[llvm] [VectorCombine] Fix invalid shuffle cost argument of foldShuffleOfSelects (PR #130281)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 07:41:01 PST 2025
https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/130281
>From eb61e3137286f6761b5b455bef99cc4bb57fccb1 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 7 Mar 2025 19:22:33 +0900
Subject: [PATCH 1/2] [VectorCombine] Fix invalid shuffle cost argument of
foldShuffleOfSelects
In the previous code, it specified the destination vector as
the getShuffleCost argument. Because the shuffle mask specifies
the indices of the two vectors specified as elements, the maximum
value is twice the size of the source vector. This causes a problem
if the destination vector is smaller than the source vector and
specify an index in the mask that exceeds the size of the destination
vector.
Fix the problem by correcting the previous code, which was using
wrong argument in the Cost calculation.
Fixed https://github.com/llvm/llvm-project/issues/130250
---
.../Transforms/Vectorize/VectorCombine.cpp | 21 +-
.../PhaseOrdering/X86/blendv-select.ll | 342 +++++++++++++-----
.../AArch64/shuffletoidentity.ll | 6 +-
.../VectorCombine/X86/shuffle-of-selects.ll | 255 ++++++++-----
4 files changed, 435 insertions(+), 189 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4d4a1a6e04d32..776a733d86afc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2037,7 +2037,6 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
m_Mask(Mask))))
return false;
- auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
auto *C1VecTy = dyn_cast<FixedVectorType>(C1->getType());
auto *C2VecTy = dyn_cast<FixedVectorType>(C2->getType());
if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
@@ -2051,24 +2050,26 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
(SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
return false;
+ auto *SrcVecTy = dyn_cast<FixedVectorType>(T1->getType());
+ auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
auto SelOp = Instruction::Select;
InstructionCost OldCost = TTI.getCmpSelInstrCost(
- SelOp, T1->getType(), C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
- OldCost += TTI.getCmpSelInstrCost(SelOp, T2->getType(), C2VecTy,
+ SelOp, DstVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ OldCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C2VecTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
- OldCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr,
+ OldCost += TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr,
{I.getOperand(0), I.getOperand(1)}, &I);
- auto *C1C2VecTy = cast<FixedVectorType>(
- toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
InstructionCost NewCost =
- TTI.getShuffleCost(SK, C1C2VecTy, Mask, CostKind, 0, nullptr, {C1, C2});
+ TTI.getShuffleCost(SK, C1VecTy, Mask, CostKind, 0, nullptr, {C1, C2});
NewCost +=
- TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {T1, T2});
+ TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {T1, T2});
NewCost +=
- TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {F1, F2});
- NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, DstVecTy,
+ TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {F1, F2});
+ auto *C1C2ShuffledVecTy = cast<FixedVectorType>(
+ toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
+ NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C1C2ShuffledVecTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
index c2ed7b9c84523..84edc6e90a91d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
;
; PR58895 - replace shuffled _mm_blendv_epi8+icmp with select+icmp
@@ -12,10 +12,20 @@
;
define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
-; CHECK-LABEL: @x86_pblendvb_v4f64_v2f64(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
-; CHECK-NEXT: ret <4 x double> [[DOTV]]
+; SSE-LABEL: @x86_pblendvb_v4f64_v2f64(
+; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; SSE-NEXT: ret <4 x double> [[DOTV]]
+;
+; AVX2-LABEL: @x86_pblendvb_v4f64_v2f64(
+; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; AVX2-NEXT: ret <4 x double> [[DOTV]]
+;
+; AVX512-LABEL: @x86_pblendvb_v4f64_v2f64(
+; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; AVX512-NEXT: ret <4 x double> [[DOTV]]
;
%a.bc = bitcast <4 x double> %a to <32 x i8>
%b.bc = bitcast <4 x double> %b to <32 x i8>
@@ -36,10 +46,20 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b,
}
define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
-; CHECK-LABEL: @x86_pblendvb_v8f32_v4f32(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
-; CHECK-NEXT: ret <8 x float> [[DOTV]]
+; SSE-LABEL: @x86_pblendvb_v8f32_v4f32(
+; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; SSE-NEXT: ret <8 x float> [[DOTV]]
+;
+; AVX2-LABEL: @x86_pblendvb_v8f32_v4f32(
+; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; AVX2-NEXT: ret <8 x float> [[DOTV]]
+;
+; AVX512-LABEL: @x86_pblendvb_v8f32_v4f32(
+; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; AVX512-NEXT: ret <8 x float> [[DOTV]]
;
%a.bc = bitcast <8 x float> %a to <32 x i8>
%b.bc = bitcast <8 x float> %b to <32 x i8>
@@ -60,10 +80,20 @@ define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8
}
define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v4i64_v2i64(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
-; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+; SSE-LABEL: @x86_pblendvb_v4i64_v2i64(
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
+; SSE-NEXT: ret <4 x i64> [[TMP2]]
+;
+; AVX2-LABEL: @x86_pblendvb_v4i64_v2i64(
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
+; AVX2-NEXT: ret <4 x i64> [[TMP2]]
+;
+; AVX512-LABEL: @x86_pblendvb_v4i64_v2i64(
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
+; AVX512-NEXT: ret <4 x i64> [[TMP1]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -84,15 +114,35 @@ define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>
}
define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v8i32_v4i32(
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v8i32_v4i32(
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
+; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
+; SSE-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64>
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v8i32_v4i32(
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
+; AVX2-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
+; AVX2-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64>
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v8i32_v4i32(
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
+; AVX512-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
+; AVX512-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -115,15 +165,35 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64>
}
define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v16i16_v8i16(
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v16i16_v8i16(
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
+; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
+; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64>
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v16i16_v8i16(
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
+; AVX2-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
+; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64>
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v16i16_v8i16(
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
+; AVX512-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
+; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -146,15 +216,35 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64
}
define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v32i8_v16i8(
-; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
-; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v32i8_v16i8(
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
+; SSE-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v32i8_v16i8(
+; AVX2-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
+; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v32i8_v16i8(
+; AVX512-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
+; AVX512-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -180,10 +270,20 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64>
;
define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
-; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
-; CHECK-NEXT: ret <8 x double> [[DOTV]]
+; SSE-LABEL: @x86_pblendvb_v8f64_v4f64(
+; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
+; SSE-NEXT: ret <8 x double> [[DOTV]]
+;
+; AVX2-LABEL: @x86_pblendvb_v8f64_v4f64(
+; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
+; AVX2-NEXT: ret <8 x double> [[DOTV]]
+;
+; AVX512-LABEL: @x86_pblendvb_v8f64_v4f64(
+; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
+; AVX512-NEXT: ret <8 x double> [[DOTV]]
;
%a.bc = bitcast <8 x double> %a to <64 x i8>
%b.bc = bitcast <8 x double> %b to <64 x i8>
@@ -204,10 +304,20 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b,
}
define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
-; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
-; CHECK-NEXT: ret <16 x float> [[DOTV]]
+; SSE-LABEL: @x86_pblendvb_v16f32_v8f32(
+; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
+; SSE-NEXT: ret <16 x float> [[DOTV]]
+;
+; AVX2-LABEL: @x86_pblendvb_v16f32_v8f32(
+; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
+; AVX2-NEXT: ret <16 x float> [[DOTV]]
+;
+; AVX512-LABEL: @x86_pblendvb_v16f32_v8f32(
+; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
+; AVX512-NEXT: ret <16 x float> [[DOTV]]
;
%a.bc = bitcast <16 x float> %a to <64 x i8>
%b.bc = bitcast <16 x float> %b to <64 x i8>
@@ -228,10 +338,20 @@ define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b,
}
define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v8i64_v4i64(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
-; CHECK-NEXT: ret <8 x i64> [[TMP1]]
+; SSE-LABEL: @x86_pblendvb_v8i64_v4i64(
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
+; SSE-NEXT: ret <8 x i64> [[TMP2]]
+;
+; AVX2-LABEL: @x86_pblendvb_v8i64_v4i64(
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
+; AVX2-NEXT: ret <8 x i64> [[TMP2]]
+;
+; AVX512-LABEL: @x86_pblendvb_v8i64_v4i64(
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
+; AVX512-NEXT: ret <8 x i64> [[TMP1]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -252,15 +372,35 @@ define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64>
}
define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v16i32_v8i32(
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
-; CHECK-NEXT: ret <8 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v16i32_v8i32(
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
+; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
+; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64>
+; SSE-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v16i32_v8i32(
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
+; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
+; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64>
+; AVX2-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v16i32_v8i32(
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
+; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
+; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
+; AVX512-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -283,15 +423,35 @@ define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64
}
define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v32i16_v16i16(
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
-; CHECK-NEXT: ret <8 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v32i16_v16i16(
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
+; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
+; SSE-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64>
+; SSE-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v32i16_v16i16(
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
+; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
+; AVX2-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64>
+; AVX2-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v32i16_v16i16(
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
+; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
+; AVX512-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
+; AVX512-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -314,15 +474,35 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6
}
define <8 x i64> @x86_pblendvb_v64i8_v32i8(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v64i8_v32i8(
-; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
-; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
-; CHECK-NEXT: ret <8 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v64i8_v32i8(
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
+; SSE-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
+; SSE-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v64i8_v32i8(
+; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
+; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
+; AVX2-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v64i8_v32i8(
+; AVX512-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
+; AVX512-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
+; AVX512-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 09875c5e0af40..1c128c8f56a03 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -997,8 +997,10 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
-; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
-; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32>
; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[RES]]
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 6653bf3375423..0bc87a9b03821 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -4,28 +4,12 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
-; SSE-LABEL: define <4 x i16> @src_v2tov4_i16(
-; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
-; SSE-NEXT: ret <4 x i16> [[RES]]
-;
-; AVX2-LABEL: define <4 x i16> @src_v2tov4_i16(
-; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
-; AVX2-NEXT: ret <4 x i16> [[RES]]
-;
-; AVX512-LABEL: define <4 x i16> @src_v2tov4_i16(
-; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
-; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
-; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX512-NEXT: ret <4 x i16> [[RES]]
+; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i16> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
%select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
@@ -34,28 +18,12 @@ define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i1
}
define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
-; SSE-LABEL: define <8 x i16> @src_v4tov8_i16(
-; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
-; SSE-NEXT: ret <8 x i16> [[RES]]
-;
-; AVX2-LABEL: define <8 x i16> @src_v4tov8_i16(
-; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
-; AVX2-NEXT: ret <8 x i16> [[RES]]
-;
-; AVX512-LABEL: define <8 x i16> @src_v4tov8_i16(
-; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
-; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
-; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX512-NEXT: ret <8 x i16> [[RES]]
+; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i16> [[RES]]
;
%select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
%select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
@@ -94,12 +62,28 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1
}
define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
-; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: ret <16 x i16> [[RES]]
+; SSE-LABEL: define <16 x i16> @src_v8tov16_i16(
+; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
+; SSE-NEXT: ret <16 x i16> [[RES]]
+;
+; AVX2-LABEL: define <16 x i16> @src_v8tov16_i16(
+; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
+; AVX2-NEXT: ret <16 x i16> [[RES]]
+;
+; AVX512-LABEL: define <16 x i16> @src_v8tov16_i16(
+; AVX512-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX512-NEXT: ret <16 x i16> [[RES]]
;
%select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
%select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
@@ -108,28 +92,12 @@ define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x
}
define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
-; SSE-LABEL: define <4 x i32> @src_v2tov4_i32(
-; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
-; SSE-NEXT: ret <4 x i32> [[RES]]
-;
-; AVX2-LABEL: define <4 x i32> @src_v2tov4_i32(
-; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
-; AVX2-NEXT: ret <4 x i32> [[RES]]
-;
-; AVX512-LABEL: define <4 x i32> @src_v2tov4_i32(
-; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
-; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
-; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX512-NEXT: ret <4 x i32> [[RES]]
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
%select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
@@ -138,12 +106,28 @@ define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i3
}
define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
-; CHECK-LABEL: define <8 x i32> @src_v4tov8_i32(
-; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x i32> [[RES]]
+; SSE-LABEL: define <8 x i32> @src_v4tov8_i32(
+; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; SSE-NEXT: ret <8 x i32> [[RES]]
+;
+; AVX2-LABEL: define <8 x i32> @src_v4tov8_i32(
+; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; AVX2-NEXT: ret <8 x i32> [[RES]]
+;
+; AVX512-LABEL: define <8 x i32> @src_v4tov8_i32(
+; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT: ret <8 x i32> [[RES]]
;
%select.xz = select <4 x i1> %a, <4 x i32> %x, <4 x i32> %z
%select.yx = select <4 x i1> %b, <4 x i32> %y, <4 x i32> %x
@@ -182,12 +166,28 @@ define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6
}
define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
-; CHECK-LABEL: define <4 x i64> @src_v2tov4_i64(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: define <4 x i64> @src_v2tov4_i64(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: define <4 x i64> @src_v2tov4_i64(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: define <4 x i64> @src_v2tov4_i64(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
%select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
@@ -226,12 +226,28 @@ define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <
}
define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
-; CHECK-LABEL: define <8 x float> @src_v4tov8_float(
-; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x float> [[RES]]
+; SSE-LABEL: define <8 x float> @src_v4tov8_float(
+; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]]
+; SSE-NEXT: ret <8 x float> [[RES]]
+;
+; AVX2-LABEL: define <8 x float> @src_v4tov8_float(
+; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]]
+; AVX2-NEXT: ret <8 x float> [[RES]]
+;
+; AVX512-LABEL: define <8 x float> @src_v4tov8_float(
+; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT: ret <8 x float> [[RES]]
;
%select.xz = select <4 x i1> %a, <4 x float> %x, <4 x float> %z
%select.yx = select <4 x i1> %b, <4 x float> %y, <4 x float> %x
@@ -270,12 +286,28 @@ define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
}
define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
-; CHECK-LABEL: define <4 x double> @src_v2tov4_double(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x double> [[RES]]
+; SSE-LABEL: define <4 x double> @src_v2tov4_double(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]]
+; SSE-NEXT: ret <4 x double> [[RES]]
+;
+; AVX2-LABEL: define <4 x double> @src_v2tov4_double(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]]
+; AVX2-NEXT: ret <4 x double> [[RES]]
+;
+; AVX512-LABEL: define <4 x double> @src_v2tov4_double(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x double> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
%select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
@@ -283,6 +315,37 @@ define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
ret <4 x double> %res
}
+; ISSUE - https://github.com/llvm/llvm-project/issues/130250
+; There should be no issues when the mask elements are in the following range
+; DestVectorSize * 2 < MaskEls < SrcVectorSize * 2
+define <2 x float> @test_mask0(<4 x i1> %c, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <2 x float> @test_mask0(
+; CHECK-SAME: <4 x i1> [[C:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XY:%.*]] = select <4 x i1> [[C]], <4 x float> [[X]], <4 x float> [[Y]]
+; CHECK-NEXT: [[SELECT_YZ:%.*]] = select <4 x i1> [[C]], <4 x float> [[Y]], <4 x float> [[Z]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XY]], <4 x float> [[SELECT_YZ]], <2 x i32> <i32 4, i32 7>
+; CHECK-NEXT: ret <2 x float> [[RES]]
+;
+ %select.xy = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ %select.yz = select <4 x i1> %c, <4 x float> %y, <4 x float> %z
+ %res = shufflevector <4 x float> %select.xy, <4 x float> %select.yz, <2 x i32> <i32 4, i32 7>
+ ret <2 x float> %res
+}
+
+define <2 x float> @test_mask1(<4 x i1> %c, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <2 x float> @test_mask1(
+; CHECK-SAME: <4 x i1> [[C:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XY:%.*]] = select <4 x i1> [[C]], <4 x float> [[X]], <4 x float> [[Y]]
+; CHECK-NEXT: [[SELECT_YZ:%.*]] = select <4 x i1> [[C]], <4 x float> [[Y]], <4 x float> [[Z]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XY]], <4 x float> [[SELECT_YZ]], <2 x i32> <i32 7, i32 4>
+; CHECK-NEXT: ret <2 x float> [[RES]]
+;
+ %select.xy = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ %select.yz = select <4 x i1> %c, <4 x float> %y, <4 x float> %z
+ %res = shufflevector <4 x float> %select.xy, <4 x float> %select.yz, <2 x i32> <i32 7, i32 4>
+ ret <2 x float> %res
+}
+
; FMF Flags
define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan(
>From 602706350d7296f713ef107285734c4d0a722b96 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sat, 8 Mar 2025 00:37:44 +0900
Subject: [PATCH 2/2] Fix invalid argument in getCmpSelInstrCost
The target vector for SelectInst is SrcVecTy. It was a mistake
to specify DstVecTy as an argument before. it is now fixed.
---
.../Transforms/Vectorize/VectorCombine.cpp | 4 +-
.../PhaseOrdering/X86/blendv-select.ll | 116 +++++-------------
.../VectorCombine/X86/shuffle-of-selects.ll | 35 +++---
3 files changed, 50 insertions(+), 105 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 776a733d86afc..019d79567b4ae 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2055,8 +2055,8 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
auto SelOp = Instruction::Select;
InstructionCost OldCost = TTI.getCmpSelInstrCost(
- SelOp, DstVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
- OldCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C2VecTy,
+ SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ OldCost += TTI.getCmpSelInstrCost(SelOp, SrcVecTy, C2VecTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
OldCost += TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr,
{I.getOperand(0), I.getOperand(1)}, &I);
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
index 84edc6e90a91d..444e256f9854b 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
@@ -117,11 +117,11 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64>
; SSE-LABEL: @x86_pblendvb_v8i32_v4i32(
; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
-; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
-; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
-; SSE-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
-; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64>
+; SSE-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
; SSE-NEXT: ret <4 x i64> [[RES]]
;
; AVX2-LABEL: @x86_pblendvb_v8i32_v4i32(
@@ -168,11 +168,11 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64
; SSE-LABEL: @x86_pblendvb_v16i16_v8i16(
; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
-; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
-; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
-; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
-; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64>
+; SSE-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
; SSE-NEXT: ret <4 x i64> [[RES]]
;
; AVX2-LABEL: @x86_pblendvb_v16i16_v8i16(
@@ -270,20 +270,10 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64>
;
define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
-; SSE-LABEL: @x86_pblendvb_v8f64_v4f64(
-; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
-; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
-; SSE-NEXT: ret <8 x double> [[DOTV]]
-;
-; AVX2-LABEL: @x86_pblendvb_v8f64_v4f64(
-; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
-; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
-; AVX2-NEXT: ret <8 x double> [[DOTV]]
-;
-; AVX512-LABEL: @x86_pblendvb_v8f64_v4f64(
-; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
-; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
-; AVX512-NEXT: ret <8 x double> [[DOTV]]
+; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
+; CHECK-NEXT: ret <8 x double> [[DOTV]]
;
%a.bc = bitcast <8 x double> %a to <64 x i8>
%b.bc = bitcast <8 x double> %b to <64 x i8>
@@ -304,20 +294,10 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b,
}
define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
-; SSE-LABEL: @x86_pblendvb_v16f32_v8f32(
-; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
-; SSE-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
-; SSE-NEXT: ret <16 x float> [[DOTV]]
-;
-; AVX2-LABEL: @x86_pblendvb_v16f32_v8f32(
-; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
-; AVX2-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
-; AVX2-NEXT: ret <16 x float> [[DOTV]]
-;
-; AVX512-LABEL: @x86_pblendvb_v16f32_v8f32(
-; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
-; AVX512-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
-; AVX512-NEXT: ret <16 x float> [[DOTV]]
+; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
+; CHECK-NEXT: ret <16 x float> [[DOTV]]
;
%a.bc = bitcast <16 x float> %a to <64 x i8>
%b.bc = bitcast <16 x float> %b to <64 x i8>
@@ -338,20 +318,10 @@ define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b,
}
define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; SSE-LABEL: @x86_pblendvb_v8i64_v4i64(
-; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
-; SSE-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
-; SSE-NEXT: ret <8 x i64> [[TMP2]]
-;
-; AVX2-LABEL: @x86_pblendvb_v8i64_v4i64(
-; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
-; AVX2-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
-; AVX2-NEXT: ret <8 x i64> [[TMP2]]
-;
-; AVX512-LABEL: @x86_pblendvb_v8i64_v4i64(
-; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
-; AVX512-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
-; AVX512-NEXT: ret <8 x i64> [[TMP1]]
+; CHECK-LABEL: @x86_pblendvb_v8i64_v4i64(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
+; CHECK-NEXT: ret <8 x i64> [[TMP1]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -372,35 +342,15 @@ define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64>
}
define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; SSE-LABEL: @x86_pblendvb_v16i32_v8i32(
-; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
-; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
-; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
-; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
-; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
-; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
-; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64>
-; SSE-NEXT: ret <8 x i64> [[RES]]
-;
-; AVX2-LABEL: @x86_pblendvb_v16i32_v8i32(
-; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
-; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
-; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
-; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
-; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
-; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
-; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64>
-; AVX2-NEXT: ret <8 x i64> [[RES]]
-;
-; AVX512-LABEL: @x86_pblendvb_v16i32_v8i32(
-; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
-; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
-; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
-; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
-; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
-; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
-; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
-; AVX512-NEXT: ret <8 x i64> [[RES]]
+; CHECK-LABEL: @x86_pblendvb_v16i32_v8i32(
+; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
+; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
+; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
+; CHECK-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -436,11 +386,11 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6
; AVX2-LABEL: @x86_pblendvb_v32i16_v16i16(
; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
-; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
-; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
-; AVX2-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]]
-; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64>
+; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
; AVX2-NEXT: ret <8 x i64> [[RES]]
;
; AVX512-LABEL: @x86_pblendvb_v32i16_v16i16(
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 0bc87a9b03821..2588f9116f322 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -64,10 +64,9 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1
define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
; SSE-LABEL: define <16 x i16> @src_v8tov16_i16(
; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RES]]
;
; AVX2-LABEL: define <16 x i16> @src_v8tov16_i16(
@@ -108,10 +107,9 @@ define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i3
define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; SSE-LABEL: define <8 x i32> @src_v4tov8_i32(
; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[RES]]
;
; AVX2-LABEL: define <8 x i32> @src_v4tov8_i32(
@@ -168,10 +166,9 @@ define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6
define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
; SSE-LABEL: define <4 x i64> @src_v2tov4_i64(
; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x i64> [[RES]]
;
; AVX2-LABEL: define <4 x i64> @src_v2tov4_i64(
@@ -228,10 +225,9 @@ define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <
define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
; SSE-LABEL: define <8 x float> @src_v4tov8_float(
; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]]
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x float> [[RES]]
;
; AVX2-LABEL: define <8 x float> @src_v4tov8_float(
@@ -288,10 +284,9 @@ define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
; SSE-LABEL: define <4 x double> @src_v2tov4_double(
; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]]
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[RES]]
;
; AVX2-LABEL: define <4 x double> @src_v2tov4_double(
More information about the llvm-commits
mailing list