[llvm] [SLP]Improve reordering of the alternate nodes (PR #136329)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 18 10:35:33 PDT 2025
https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/136329
Better to preserve the original order of the alternate nodes to avoid
inter-lane shuffling, select/insert subvector patterns provide better
perf.
>From 91aa3f2561be48df3c28ea5a80466e5ecccfe4c0 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 18 Apr 2025 17:35:23 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 45 +++++++++++++----
.../PhaseOrdering/AArch64/slpordering.ll | 50 +++++++++----------
.../SLPVectorizer/AArch64/loadorder.ll | 14 +++---
.../AArch64/transpose-inseltpoison.ll | 11 ++--
.../SLPVectorizer/AArch64/transpose.ll | 11 ++--
.../SLPVectorizer/RISCV/complex-loads.ll | 50 ++++++++++---------
.../vectorize-reorder-alt-shuffle.ll | 5 +-
7 files changed, 107 insertions(+), 79 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d496989cd0581..70c1b8d846212 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6572,6 +6572,12 @@ static bool areTwoInsertFromSameBuildVector(
return false;
}
+/// Checks if the specified instruction \p I is an alternate operation for
+/// the given \p MainOp and \p AltOp instructions.
+static bool isAlternateInstruction(Instruction *I, Instruction *MainOp,
+ Instruction *AltOp,
+ const TargetLibraryInfo &TLI);
+
std::optional<BoUpSLP::OrdersType>
BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
bool IgnoreReorder) {
@@ -6714,6 +6720,28 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
"BinaryOperator and CastInst.");
return TE.ReorderIndices;
}
+ if (!TopToBottom && IgnoreReorder && TE.State == TreeEntry::Vectorize &&
+ TE.isAltShuffle()) {
+ assert(TE.ReuseShuffleIndices.empty() &&
+ "ReuseShuffleIndices should be "
+ "empty for alternate instructions.");
+ SmallVector<int> Mask;
+ TE.buildAltOpShuffleMask(
+ [&](Instruction *I) {
+ assert(TE.getMatchingMainOpOrAltOp(I) &&
+ "Unexpected main/alternate opcode");
+ return isAlternateInstruction(I, TE.getMainOp(), TE.getAltOp(), *TLI);
+ },
+ Mask);
+ const int VF = TE.getVectorFactor();
+ OrdersType ResOrder(VF, VF);
+ for (unsigned I : seq<unsigned>(VF)) {
+ if (Mask[I] == PoisonMaskElem)
+ continue;
+ ResOrder[Mask[I] % VF] = I;
+ }
+ return std::move(ResOrder);
+ }
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;
@@ -7782,13 +7810,18 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
// Reorder operands of the user node and set the ordering for the user
// node itself.
+ auto IsNotProfitableAltCodeNode = [](const TreeEntry &TE) {
+ return TE.isAltShuffle() &&
+ (!TE.ReuseShuffleIndices.empty() || TE.getVectorFactor() == 2 ||
+ TE.ReorderIndices.empty());
+ };
if (Data.first->State != TreeEntry::Vectorize ||
!isa<ExtractElementInst, ExtractValueInst, LoadInst>(
Data.first->getMainOp()) ||
- Data.first->isAltShuffle())
+ IsNotProfitableAltCodeNode(*Data.first))
Data.first->reorderOperands(Mask);
if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) ||
- Data.first->isAltShuffle() ||
+ IsNotProfitableAltCodeNode(*Data.first) ||
Data.first->State == TreeEntry::StridedVectorize ||
Data.first->State == TreeEntry::CompressVectorize) {
reorderScalars(Data.first->Scalars, Mask);
@@ -7796,7 +7829,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
/*BottomOrder=*/true);
if (Data.first->ReuseShuffleIndices.empty() &&
!Data.first->ReorderIndices.empty() &&
- !Data.first->isAltShuffle()) {
+ !IsNotProfitableAltCodeNode(*Data.first)) {
// Insert user node to the list to try to sink reordering deeper in
// the graph.
Queue.push(Data.first);
@@ -8773,12 +8806,6 @@ static std::pair<size_t, size_t> generateKeySubkey(
static bool isMainInstruction(Instruction *I, Instruction *MainOp,
Instruction *AltOp, const TargetLibraryInfo &TLI);
-/// Checks if the specified instruction \p I is an alternate operation for
-/// the given \p MainOp and \p AltOp instructions.
-static bool isAlternateInstruction(Instruction *I, Instruction *MainOp,
- Instruction *AltOp,
- const TargetLibraryInfo &TLI);
-
bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
ArrayRef<Value *> VL) const {
Type *ScalarTy = S.getMainOp()->getType();
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
index a2019836098e8..26573a3e613da 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
@@ -80,33 +80,29 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP48:%.*]] = add nsw <16 x i32> [[TMP45]], [[TMP47]]
; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP44]], [[TMP46]]
-; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP53]]
-; CHECK-NEXT: [[TMP55:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP52]]
-; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP60:%.*]] = sub nsw <16 x i32> [[TMP57]], [[TMP59]]
-; CHECK-NEXT: [[TMP61:%.*]] = add nsw <16 x i32> [[TMP56]], [[TMP58]]
-; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]]
-; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]]
-; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15)
-; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537)
-; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535)
-; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]]
-; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]]
-; CHECK-NEXT: [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]])
-; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP74]], 65535
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP74]], 16
+; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 3, i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 7>
+; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 17, i32 1, i32 16, i32 0, i32 19, i32 3, i32 18, i32 2, i32 21, i32 5, i32 20, i32 4, i32 23, i32 7, i32 22, i32 6>
+; CHECK-NEXT: [[TMP52:%.*]] = add nsw <16 x i32> [[TMP50]], [[TMP51]]
+; CHECK-NEXT: [[TMP53:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP51]]
+; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP52]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP56:%.*]] = sub nsw <16 x i32> [[TMP54]], [[TMP55]]
+; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP54]], [[TMP55]]
+; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP61]]
+; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP58]], [[TMP60]]
+; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15)
+; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537)
+; CHECK-NEXT: [[TMP67:%.*]] = mul nuw <16 x i32> [[TMP66]], splat (i32 65535)
+; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP64]]
+; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP67]]
+; CHECK-NEXT: [[TMP70:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]])
+; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP70]], 65535
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP70]], 16
; CHECK-NEXT: [[RDD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]]
; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[RDD119]], 1
; CHECK-NEXT: ret i32 [[SHR120]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 5ad676537f9c4..f138065101c4b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -1259,19 +1259,19 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-NEXT: [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]]
; CHECK-NEXT: [[TMP46:%.*]] = sub nsw <16 x i32> [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP46]], <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
-; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP46]], <16 x i32> [[TMP45]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
; CHECK-NEXT: [[TMP49:%.*]] = add nsw <16 x i32> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP50:%.*]] = sub nsw <16 x i32> [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP50]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP53:%.*]] = sub nsw <16 x i32> [[TMP51]], [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP52]]
-; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 4, i32 21, i32 22, i32 7, i32 8, i32 25, i32 26, i32 11, i32 12, i32 29, i32 30, i32 15>
-; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]]
-; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP58]], <16 x i32> [[TMP57]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], splat (i32 15)
; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[TMP60]], splat (i32 65537)
; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], splat (i32 65535)
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
index ab6c7443f80e8..c84c333391350 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
@@ -62,10 +62,11 @@ define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 7, i32 2>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
-; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
+; CHECK-NEXT: ret <4 x i32> [[TMP6]]
;
%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1
@@ -196,8 +197,8 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @reduction_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15)
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537)
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index 3063d85e122d8..e4fcb1ed08be9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -62,10 +62,11 @@ define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 7, i32 2>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
-; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
+; CHECK-NEXT: ret <4 x i32> [[TMP6]]
;
%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1
@@ -196,8 +197,8 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @reduction_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15)
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537)
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
index 0f56862446a9d..15425c38bbb04 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -34,11 +34,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP14]], [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1
; CHECK-NEXT: [[TMP23:%.*]] = zext <4 x i8> [[TMP22]] to <4 x i32>
; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1
@@ -54,11 +54,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP35:%.*]] = add <4 x i32> [[TMP34]], [[TMP33]]
; CHECK-NEXT: [[TMP36:%.*]] = sub <4 x i32> [[TMP34]], [[TMP33]]
-; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
+; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP39:%.*]] = add <4 x i32> [[TMP37]], [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = sub <4 x i32> [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32>
; CHECK-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
@@ -74,11 +74,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP53]]
; CHECK-NEXT: [[TMP56:%.*]] = sub <4 x i32> [[TMP54]], [[TMP53]]
-; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
+; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP59:%.*]] = add <4 x i32> [[TMP57]], [[TMP58]]
; CHECK-NEXT: [[TMP60:%.*]] = sub <4 x i32> [[TMP57]], [[TMP58]]
-; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; CHECK-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2)
; CHECK-NEXT: [[TMP63:%.*]] = load <4 x i8>, ptr null, align 1
; CHECK-NEXT: [[TMP64:%.*]] = zext <4 x i8> [[TMP63]] to <4 x i32>
@@ -104,10 +104,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP82]], [[TMP83]]
; CHECK-NEXT: [[TMP85:%.*]] = sub <4 x i32> [[TMP82]], [[TMP83]]
; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <4 x i32> [[TMP84]], <4 x i32> [[TMP85]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP89]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP41]], [[TMP21]]
-; CHECK-NEXT: [[TMP88:%.*]] = sub <4 x i32> [[TMP21]], [[TMP41]]
-; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP90:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP89]], <4 x i32> [[TMP87]], i64 4)
+; CHECK-NEXT: [[TMP106:%.*]] = sub <4 x i32> [[TMP88]], [[TMP89]]
+; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <4 x i32> [[TMP106]], <4 x i32> [[TMP87]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]]
; CHECK-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]]
; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -115,7 +116,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]]
; CHECK-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]]
; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 12, i32 8, i32 13, i32 9, i32 14, i32 10, i32 15, i32 11>
-; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> <i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> <i32 2, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i32> [[TMP43]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <16 x i32> [[TMP98]], <16 x i32> [[TMP99]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -123,9 +124,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP104:%.*]] = shufflevector <16 x i32> [[TMP102]], <16 x i32> [[TMP103]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 poison, i32 18, i32 19>
; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 poison, i32 17, i32 poison, i32 12, i32 poison, i32 14, i32 15>
+; CHECK-NEXT: [[TMP116:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 18, i32 poison, i32 19, i32 poison, i32 12, i32 poison, i32 14, i32 15>
; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP106]], <16 x i32> [[TMP107]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 10, i32 17, i32 12, i32 18, i32 14, i32 15>
+; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP116]], <16 x i32> [[TMP107]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 18, i32 10, i32 19, i32 12, i32 16, i32 14, i32 15>
; CHECK-NEXT: [[TMP109:%.*]] = lshr <16 x i32> [[TMP108]], splat (i32 15)
; CHECK-NEXT: [[TMP110:%.*]] = and <16 x i32> [[TMP109]], splat (i32 65537)
; CHECK-NEXT: [[TMP111:%.*]] = mul <16 x i32> [[TMP110]], splat (i32 65535)
@@ -165,11 +166,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; THR15-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; THR15-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP13]]
; THR15-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP14]], [[TMP13]]
-; THR15-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
+; THR15-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; THR15-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP17]], [[TMP18]]
; THR15-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[TMP17]], [[TMP18]]
-; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; THR15-NEXT: [[TMP88:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; THR15-NEXT: [[TMP22:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1
; THR15-NEXT: [[TMP23:%.*]] = zext <4 x i8> [[TMP22]] to <4 x i32>
; THR15-NEXT: [[TMP24:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1
@@ -185,11 +186,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; THR15-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; THR15-NEXT: [[TMP35:%.*]] = add <4 x i32> [[TMP34]], [[TMP33]]
; THR15-NEXT: [[TMP36:%.*]] = sub <4 x i32> [[TMP34]], [[TMP33]]
-; THR15-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
+; THR15-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP39:%.*]] = add <4 x i32> [[TMP37]], [[TMP38]]
; THR15-NEXT: [[TMP40:%.*]] = sub <4 x i32> [[TMP37]], [[TMP38]]
-; THR15-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; THR15-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; THR15-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
; THR15-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32>
; THR15-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
@@ -205,11 +206,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; THR15-NEXT: [[TMP54:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; THR15-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP53]]
; THR15-NEXT: [[TMP56:%.*]] = sub <4 x i32> [[TMP54]], [[TMP53]]
-; THR15-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
+; THR15-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; THR15-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP59:%.*]] = add <4 x i32> [[TMP57]], [[TMP58]]
; THR15-NEXT: [[TMP60:%.*]] = sub <4 x i32> [[TMP57]], [[TMP58]]
-; THR15-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; THR15-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; THR15-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2)
; THR15-NEXT: [[TMP63:%.*]] = load <4 x i8>, ptr null, align 1
; THR15-NEXT: [[TMP64:%.*]] = zext <4 x i8> [[TMP63]] to <4 x i32>
@@ -235,10 +236,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; THR15-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP82]], [[TMP83]]
; THR15-NEXT: [[TMP85:%.*]] = sub <4 x i32> [[TMP82]], [[TMP83]]
; THR15-NEXT: [[TMP86:%.*]] = shufflevector <4 x i32> [[TMP84]], <4 x i32> [[TMP85]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; THR15-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP89]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP41]], [[TMP21]]
-; THR15-NEXT: [[TMP88:%.*]] = sub <4 x i32> [[TMP21]], [[TMP41]]
-; THR15-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; THR15-NEXT: [[TMP90:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP89]], <4 x i32> [[TMP87]], i64 4)
+; THR15-NEXT: [[TMP106:%.*]] = sub <4 x i32> [[TMP88]], [[TMP89]]
+; THR15-NEXT: [[TMP90:%.*]] = shufflevector <4 x i32> [[TMP106]], <4 x i32> [[TMP87]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
; THR15-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]]
; THR15-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]]
; THR15-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -246,7 +248,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; THR15-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]]
; THR15-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]]
; THR15-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 12, i32 8, i32 13, i32 9, i32 14, i32 10, i32 15, i32 11>
-; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> <i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> <i32 2, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP99:%.*]] = shufflevector <4 x i32> [[TMP43]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP100:%.*]] = shufflevector <16 x i32> [[TMP98]], <16 x i32> [[TMP99]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP101:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -254,9 +256,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; THR15-NEXT: [[TMP103:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP104:%.*]] = shufflevector <16 x i32> [[TMP102]], <16 x i32> [[TMP103]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 poison, i32 18, i32 19>
; THR15-NEXT: [[TMP105:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; THR15-NEXT: [[TMP106:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 poison, i32 17, i32 poison, i32 12, i32 poison, i32 14, i32 15>
+; THR15-NEXT: [[TMP115:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 18, i32 poison, i32 19, i32 poison, i32 12, i32 poison, i32 14, i32 15>
; THR15-NEXT: [[TMP107:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; THR15-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP106]], <16 x i32> [[TMP107]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 10, i32 17, i32 12, i32 18, i32 14, i32 15>
+; THR15-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP115]], <16 x i32> [[TMP107]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 18, i32 10, i32 19, i32 12, i32 16, i32 14, i32 15>
; THR15-NEXT: [[TMP109:%.*]] = lshr <16 x i32> [[TMP108]], splat (i32 15)
; THR15-NEXT: [[TMP110:%.*]] = and <16 x i32> [[TMP109]], splat (i32 65537)
; THR15-NEXT: [[TMP111:%.*]] = mul <16 x i32> [[TMP110]], splat (i32 65535)
diff --git a/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll
index 631adf1fa81d5..f0f8377d637f9 100644
--- a/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll
@@ -11,11 +11,12 @@ define void @foo(ptr %c, ptr %d) {
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 3>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 1, i32 2, i32 7, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sitofp <4 x i32> [[TMP6]] to <4 x float>
; CHECK-NEXT: [[TMP8:%.*]] = fdiv <4 x float> [[TMP7]], undef
-; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[ADD_PTR53]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[ADD_PTR53]], align 4
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list