[llvm] [SLP]Buildvector for alternate instructions with non-profitable gather (PR #84978)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 13:55:54 PDT 2024
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/84978
>From ea1918b5f581f7b189aefcd953abb855c96b3d42 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 12 Mar 2024 20:28:58 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 96 ++++++++++++++++++-
.../AArch64/extractelements-to-shuffle.ll | 40 ++++----
.../X86/gather-move-out-of-loop.ll | 10 +-
...gathered-delayed-nodes-with-reused-user.ll | 18 ++--
.../non-scheduled-inst-reused-as-last-inst.ll | 12 +--
.../X86/reorder_with_external_users.ll | 18 ++--
.../X86/vectorize-widest-phis.ll | 10 +-
.../SLPVectorizer/alternate-non-profitable.ll | 36 ++++---
8 files changed, 161 insertions(+), 79 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b8b67609d755fd..085a3b356cf506 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2235,7 +2235,7 @@ class BoUpSLP {
/// of the cost, considered to be good enough score.
std::optional<int>
findBestRootPair(ArrayRef<std::pair<Value *, Value *>> Candidates,
- int Limit = LookAheadHeuristics::ScoreFail) {
+ int Limit = LookAheadHeuristics::ScoreFail) const {
LookAheadHeuristics LookAhead(*TLI, *DL, *SE, *this, /*NumLanes=*/2,
RootLookAheadMaxDepth);
int BestScore = Limit;
@@ -6019,6 +6019,100 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return TreeEntry::NeedToGather;
}
+ // Check that the operand node does not generate buildvector sequence. If it
+ // is, then probably not worth it to build alternate shuffle, if number of
+ // buildvector operands + alternate instruction > than the number of
+ // buildvector instructions.
+ SmallVector<ValueList> Operands;
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands())) {
+ Operands.emplace_back();
+ // Prepare the operand vector.
+ for (Value *V : VL)
+ Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
+ }
+ if (Operands.size() == 2) {
+ // Try find best operands candidates.
+ for (unsigned I : seq<unsigned>(0, VL.size() - 1)) {
+ SmallVector<std::pair<Value *, Value *>> Candidates(3);
+ Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]);
+ Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]);
+ Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]);
+ std::optional<int> Res = findBestRootPair(Candidates);
+ switch (Res.value_or(0)) {
+ case 0:
+ break;
+ case 1:
+ std::swap(Operands[0][I + 1], Operands[1][I + 1]);
+ break;
+ case 2:
+ std::swap(Operands[0][I], Operands[1][I]);
+ break;
+ default:
+ llvm_unreachable("Unexpected index.");
+ }
+ }
+ }
+ DenseSet<unsigned> UniqueOpcodes;
+ constexpr unsigned NumAltInsts = 3; // main + alt + shuffle.
+ unsigned NonInstCnt = 0;
+ unsigned UndefCnt = 0;
+ unsigned ExtraShuffleInsts = 0;
+ if (Operands.size() == 2) {
+ // Do not count same operands twice.
+ if (Operands.front() == Operands.back()) {
+ Operands.erase(Operands.begin());
+ } else if (!allConstant(Operands.front()) &&
+ all_of(Operands.front(), [&](Value *V) {
+ return is_contained(Operands.back(), V);
+ })) {
+ Operands.erase(Operands.begin());
+ ++ExtraShuffleInsts;
+ }
+ }
+ const Loop *L = LI->getLoopFor(VL0->getParent());
+ if (any_of(Operands,
+ [&](ArrayRef<Value *> Op) {
+ if (allConstant(Op) ||
+ (!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
+ getSameOpcode(Op, *TLI).MainOp))
+ return false;
+ DenseMap<Value *, unsigned> Uniques;
+ for (Value *V : Op) {
+ if (isa<Constant, ExtractElementInst>(V) ||
+ getTreeEntry(V) || (L && L->isLoopInvariant(V))) {
+ if (isa<UndefValue>(V))
+ ++UndefCnt;
+ continue;
+ }
+ auto Res = Uniques.try_emplace(V, 0);
+ // Found first duplicate - need to add shuffle.
+ if (!Res.second && Res.first->second == 1)
+ ++ExtraShuffleInsts;
+ ++Res.first->getSecond();
+ if (auto *I = dyn_cast<Instruction>(V))
+ UniqueOpcodes.insert(I->getOpcode());
+ else if (Res.second)
+ ++NonInstCnt;
+ }
+ if (any_of(Uniques, [&](const auto &P) {
+ return P.first->hasNUsesOrMore(P.second + 1) &&
+ none_of(P.first->users(), [&](User *U) {
+ return getTreeEntry(U) || Uniques.contains(U);
+ });
+ }))
+ return false;
+ return true;
+ }) &&
+ (UndefCnt >= (VL.size() - 1) * VL0->getNumOperands() ||
+ (UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts +
+ NumAltInsts) >= VL0->getNumOperands() * VL.size())) {
+ LLVM_DEBUG(
+ dbgs()
+ << "SLP: ShuffleVector not vectorized, operands are buildvector and "
+ "the whole alt sequence is not profitable.\n");
+ return TreeEntry::NeedToGather;
+ }
+
return TreeEntry::Vectorize;
}
default:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
index 44542f32bf145d..5e0cd92caf9258 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
@@ -103,16 +103,16 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
; CHECK-NEXT: [[AND95:%.*]] = and i32 [[B_0278]], 1
; CHECK-NEXT: [[SHR96]] = lshr i32 [[A_0279]], 1
; CHECK-NEXT: [[SHR97]] = lshr i32 [[B_0278]], 1
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[AND94]], i32 0
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP24:%.*]] = icmp eq <2 x i32> [[TMP23]], zeroinitializer
-; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <2 x i32> [[TMP23]], zeroinitializer
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> [[TMP25]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
-; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> poison, i32 [[AND95]], i32 0
-; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP27]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <2 x i32> [[TMP28]], zeroinitializer
-; CHECK-NEXT: [[TMP30:%.*]] = icmp eq <2 x i32> [[TMP28]], zeroinitializer
-; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x i1> [[TMP29]], <2 x i1> [[TMP30]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND94]], 0
+; CHECK-NEXT: [[TOBOOL98:%.*]] = icmp ne i32 [[AND95]], 0
+; CHECK-NEXT: [[TOBOOL100:%.*]] = icmp eq i32 [[AND94]], 0
+; CHECK-NEXT: [[TOBOOL103:%.*]] = icmp eq i32 [[AND95]], 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL100]], i32 0
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TOBOOL]], i32 1
+; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL98]], i32 0
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i1> [[TMP25]], i1 [[TOBOOL103]], i32 1
+; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i1> [[TMP27]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP26]], <4 x i1> [[TMP31]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i1> [[TMP32]] to <4 x i32>
; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP21]], [[TMP33]]
@@ -148,16 +148,16 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
; CHECK-NEXT: [[AND134:%.*]] = and i32 [[B_1300]], 1
; CHECK-NEXT: [[SHR135]] = lshr i32 [[A_1301]], 1
; CHECK-NEXT: [[SHR136]] = lshr i32 [[B_1300]], 1
-; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> poison, i32 [[AND133]], i32 0
-; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x i32> [[TMP39]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <2 x i32> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <2 x i32> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <2 x i1> [[TMP41]], <2 x i1> [[TMP42]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
-; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x i32> poison, i32 [[AND134]], i32 0
-; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <2 x i32> [[TMP45]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i32> [[TMP45]], zeroinitializer
-; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i1> [[TMP46]], <2 x i1> [[TMP47]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
+; CHECK-NEXT: [[TOBOOL137:%.*]] = icmp ne i32 [[AND133]], 0
+; CHECK-NEXT: [[TOBOOL139:%.*]] = icmp ne i32 [[AND134]], 0
+; CHECK-NEXT: [[TOBOOL144:%.*]] = icmp eq i32 [[AND133]], 0
+; CHECK-NEXT: [[TOBOOL147:%.*]] = icmp eq i32 [[AND134]], 0
+; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL144]], i32 0
+; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i1> [[TMP40]], i1 [[TOBOOL137]], i32 1
+; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i1> [[TMP41]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL139]], i32 0
+; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP42]], i1 [[TOBOOL147]], i32 1
+; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i1> [[TMP39]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP43]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i1> [[TMP49]] to <4 x i32>
; CHECK-NEXT: [[TMP51]] = add <4 x i32> [[TMP38]], [[TMP50]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
index 78fc3a60f05142..3c3dea3f1ea886 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
@@ -4,14 +4,12 @@
define void @test(i16 %0) {
; CHECK-LABEL: @test(
; CHECK-NEXT: for.body92.preheader:
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
; CHECK-NEXT: br label [[FOR_BODY92:%.*]]
; CHECK: for.body92:
+; CHECK-NEXT: [[CONV177_I:%.*]] = sext i16 0 to i32
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0:%.*]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, i32 [[CONV177_I]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8
; CHECK-NEXT: br label [[FOR_BODY92]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index 16ede231c200ec..19a8aa9b618156 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -6,21 +6,19 @@ define i64 @foo() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
+; CHECK-NEXT: [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ]
; CHECK-NEXT: ret i64 0
; CHECK: bb3:
; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
-; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[ADD]] = add i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
+; CHECK-NEXT: [[TMP9]] = or i64 [[PHI5]], 0
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
index 3a9eca2bf2e6b6..59cd1c0ccddf8c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
@@ -4,22 +4,22 @@
define void @foo() {
; CHECK-LABEL: define void @foo() {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP2]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SHL]], i32 0
; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
; CHECK-NEXT: br label [[BB4]]
; CHECK: bb4:
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
; CHECK: bb5:
-; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ]
+; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ [[SHL]], [[BB4]] ]
+; CHECK-NEXT: [[PHI7:%.*]] = phi i32 [ [[TMP8]], [[BB4]] ]
; CHECK-NEXT: ret void
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
index 09b3d25fd6dc03..65560422da0b74 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
@@ -111,11 +111,10 @@ define void @addsub_and_external_users(ptr %A, ptr %ptr) {
; CHECK-LABEL: @addsub_and_external_users(
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHUFFLE]], <double 1.100000e+00, double 1.200000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 1.100000e+00, double 1.200000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[SUB1:%.*]] = fsub double [[LD]], 1.100000e+00
+; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[LD]], 1.200000e+00
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[SUB1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP0]], double [[ADD2]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.100000e+00, double 2.200000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.100000e+00, double 3.200000e+00>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
@@ -158,11 +157,10 @@ define void @subadd_and_external_users(ptr %A, ptr %ptr) {
; CHECK-LABEL: @subadd_and_external_users(
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 1.200000e+00, double 1.100000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[SHUFFLE]], <double 1.200000e+00, double 1.100000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[LD]], 1.100000e+00
+; CHECK-NEXT: [[SUB2:%.*]] = fsub double [[LD]], 1.200000e+00
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP0]], double [[ADD1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.200000e+00, double 2.100000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.200000e+00, double 3.100000e+00>
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[A:%.*]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
index 17f9f371ff6ef9..813e94ab83adcc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
@@ -18,12 +18,10 @@ define void @foo() {
; CHECK: bb4:
; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double>
; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[CONV2]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]]
+; CHECK-NEXT: [[SUB1:%.*]] = fsub double undef, undef
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> <double poison, double poison, double undef, double undef>, double [[SUB1]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
index c6e2cf5543e12e..287b623f63690a 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
@@ -33,11 +33,10 @@ define <2 x float> @replace_through_casts(i16 %inp) {
; CHECK-LABEL: define <2 x float> @replace_through_casts(
; CHECK-SAME: i16 [[INP:%.*]]) {
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i16> [[TMP2]] to <2 x float>
-; CHECK-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP2]] to <2 x float>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[INP]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -118,11 +117,10 @@ define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
; CHECK-LABEL: define <2 x i32> @replace_through_int_casts(
; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[INP]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[ADD]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -136,11 +134,10 @@ define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) {
; CHECK-LABEL: define <2 x i32> @replace_through_int_casts_ele0_only(
; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[INP]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[INP]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i64 1
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%2 = sext i16 %inp to i32
@@ -174,11 +171,10 @@ define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x
; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags(
; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP]], 5
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[ADD]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], <i8 123, i8 1>
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i8> [[TMP2]], <i8 123, i8 1>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%add = xor i8 %inp, 5
More information about the llvm-commits
mailing list