[llvm] 0ee8f69 - [VectorCombine] Fix invalid shuffle cost argument of foldShuffleOfSelects (#130281)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 08:40:30 PST 2025
Author: hanbeom
Date: 2025-03-07T16:40:26Z
New Revision: 0ee8f699780569d7a6b94d61cd833285bb67eca1
URL: https://github.com/llvm/llvm-project/commit/0ee8f699780569d7a6b94d61cd833285bb67eca1
DIFF: https://github.com/llvm/llvm-project/commit/0ee8f699780569d7a6b94d61cd833285bb67eca1.diff
LOG: [VectorCombine] Fix invalid shuffle cost argument of foldShuffleOfSelects (#130281)
In the previous code (#128032), it specified the destination vector as the
getShuffleCost argument. Because the shuffle mask specifies the indices
of the two vectors specified as elements, the maximum value is twice the
size of the source vector. This causes a problem if the destination
vector is smaller than the source vector and specify an index in the
mask that exceeds the size of the destination vector.
Fix the problem by correcting the previous code, which was using wrong
argument in the Cost calculation.
Fixes #130250
Added:
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4d4a1a6e04d32..019d79567b4ae 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2037,7 +2037,6 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
m_Mask(Mask))))
return false;
- auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
auto *C1VecTy = dyn_cast<FixedVectorType>(C1->getType());
auto *C2VecTy = dyn_cast<FixedVectorType>(C2->getType());
if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
@@ -2051,24 +2050,26 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
(SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
return false;
+ auto *SrcVecTy = dyn_cast<FixedVectorType>(T1->getType());
+ auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
auto SelOp = Instruction::Select;
InstructionCost OldCost = TTI.getCmpSelInstrCost(
- SelOp, T1->getType(), C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
- OldCost += TTI.getCmpSelInstrCost(SelOp, T2->getType(), C2VecTy,
+ SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ OldCost += TTI.getCmpSelInstrCost(SelOp, SrcVecTy, C2VecTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
- OldCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr,
+ OldCost += TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr,
{I.getOperand(0), I.getOperand(1)}, &I);
- auto *C1C2VecTy = cast<FixedVectorType>(
- toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
InstructionCost NewCost =
- TTI.getShuffleCost(SK, C1C2VecTy, Mask, CostKind, 0, nullptr, {C1, C2});
+ TTI.getShuffleCost(SK, C1VecTy, Mask, CostKind, 0, nullptr, {C1, C2});
NewCost +=
- TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {T1, T2});
+ TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {T1, T2});
NewCost +=
- TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {F1, F2});
- NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, DstVecTy,
+ TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {F1, F2});
+ auto *C1C2ShuffledVecTy = cast<FixedVectorType>(
+ toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
+ NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C1C2ShuffledVecTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
index c2ed7b9c84523..444e256f9854b 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
;
; PR58895 - replace shuffled _mm_blendv_epi8+icmp with select+icmp
@@ -12,10 +12,20 @@
;
define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
-; CHECK-LABEL: @x86_pblendvb_v4f64_v2f64(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
-; CHECK-NEXT: ret <4 x double> [[DOTV]]
+; SSE-LABEL: @x86_pblendvb_v4f64_v2f64(
+; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; SSE-NEXT: ret <4 x double> [[DOTV]]
+;
+; AVX2-LABEL: @x86_pblendvb_v4f64_v2f64(
+; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; AVX2-NEXT: ret <4 x double> [[DOTV]]
+;
+; AVX512-LABEL: @x86_pblendvb_v4f64_v2f64(
+; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; AVX512-NEXT: ret <4 x double> [[DOTV]]
;
%a.bc = bitcast <4 x double> %a to <32 x i8>
%b.bc = bitcast <4 x double> %b to <32 x i8>
@@ -36,10 +46,20 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b,
}
define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
-; CHECK-LABEL: @x86_pblendvb_v8f32_v4f32(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
-; CHECK-NEXT: ret <8 x float> [[DOTV]]
+; SSE-LABEL: @x86_pblendvb_v8f32_v4f32(
+; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; SSE-NEXT: ret <8 x float> [[DOTV]]
+;
+; AVX2-LABEL: @x86_pblendvb_v8f32_v4f32(
+; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; AVX2-NEXT: ret <8 x float> [[DOTV]]
+;
+; AVX512-LABEL: @x86_pblendvb_v8f32_v4f32(
+; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; AVX512-NEXT: ret <8 x float> [[DOTV]]
;
%a.bc = bitcast <8 x float> %a to <32 x i8>
%b.bc = bitcast <8 x float> %b to <32 x i8>
@@ -60,10 +80,20 @@ define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8
}
define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v4i64_v2i64(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
-; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+; SSE-LABEL: @x86_pblendvb_v4i64_v2i64(
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
+; SSE-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
+; SSE-NEXT: ret <4 x i64> [[TMP2]]
+;
+; AVX2-LABEL: @x86_pblendvb_v4i64_v2i64(
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
+; AVX2-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
+; AVX2-NEXT: ret <4 x i64> [[TMP2]]
+;
+; AVX512-LABEL: @x86_pblendvb_v4i64_v2i64(
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
+; AVX512-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
+; AVX512-NEXT: ret <4 x i64> [[TMP1]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -84,15 +114,35 @@ define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>
}
define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v8i32_v4i32(
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v8i32_v4i32(
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
+; SSE-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
+; SSE-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v8i32_v4i32(
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
+; AVX2-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
+; AVX2-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64>
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v8i32_v4i32(
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
+; AVX512-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
+; AVX512-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -115,15 +165,35 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64>
}
define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v16i16_v8i16(
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v16i16_v8i16(
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
+; SSE-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
+; SSE-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v16i16_v8i16(
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
+; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
+; AVX2-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
+; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64>
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v16i16_v8i16(
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
+; AVX512-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
+; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -146,15 +216,35 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64
}
define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v32i8_v16i8(
-; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
-; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v32i8_v16i8(
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
+; SSE-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v32i8_v16i8(
+; AVX2-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
+; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v32i8_v16i8(
+; AVX512-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
+; AVX512-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -283,15 +373,35 @@ define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64
}
define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v32i16_v16i16(
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
-; CHECK-NEXT: ret <8 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v32i16_v16i16(
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
+; SSE-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
+; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
+; SSE-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64>
+; SSE-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v32i16_v16i16(
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
+; AVX2-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
+; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
+; AVX2-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v32i16_v16i16(
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
+; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
+; AVX512-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
+; AVX512-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -314,15 +424,35 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6
}
define <8 x i64> @x86_pblendvb_v64i8_v32i8(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; CHECK-LABEL: @x86_pblendvb_v64i8_v32i8(
-; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
-; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
-; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
-; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]]
-; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
-; CHECK-NEXT: ret <8 x i64> [[RES]]
+; SSE-LABEL: @x86_pblendvb_v64i8_v32i8(
+; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
+; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
+; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
+; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
+; SSE-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
+; SSE-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]]
+; SSE-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
+; SSE-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX2-LABEL: @x86_pblendvb_v64i8_v32i8(
+; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
+; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
+; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
+; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
+; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
+; AVX2-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]]
+; AVX2-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
+; AVX2-NEXT: ret <8 x i64> [[RES]]
+;
+; AVX512-LABEL: @x86_pblendvb_v64i8_v32i8(
+; AVX512-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
+; AVX512-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
+; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
+; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
+; AVX512-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
+; AVX512-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]]
+; AVX512-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
+; AVX512-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 09875c5e0af40..1c128c8f56a03 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -997,8 +997,10 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
-; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
-; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32>
; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[RES]]
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 6653bf3375423..2588f9116f322 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -4,28 +4,12 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
-; SSE-LABEL: define <4 x i16> @src_v2tov4_i16(
-; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
-; SSE-NEXT: ret <4 x i16> [[RES]]
-;
-; AVX2-LABEL: define <4 x i16> @src_v2tov4_i16(
-; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
-; AVX2-NEXT: ret <4 x i16> [[RES]]
-;
-; AVX512-LABEL: define <4 x i16> @src_v2tov4_i16(
-; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
-; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
-; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX512-NEXT: ret <4 x i16> [[RES]]
+; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i16> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
%select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
@@ -34,28 +18,12 @@ define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i1
}
define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
-; SSE-LABEL: define <8 x i16> @src_v4tov8_i16(
-; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
-; SSE-NEXT: ret <8 x i16> [[RES]]
-;
-; AVX2-LABEL: define <8 x i16> @src_v4tov8_i16(
-; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
-; AVX2-NEXT: ret <8 x i16> [[RES]]
-;
-; AVX512-LABEL: define <8 x i16> @src_v4tov8_i16(
-; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
-; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
-; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX512-NEXT: ret <8 x i16> [[RES]]
+; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i16> [[RES]]
;
%select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
%select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
@@ -94,12 +62,27 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1
}
define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
-; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: ret <16 x i16> [[RES]]
+; SSE-LABEL: define <16 x i16> @src_v8tov16_i16(
+; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT: ret <16 x i16> [[RES]]
+;
+; AVX2-LABEL: define <16 x i16> @src_v8tov16_i16(
+; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
+; AVX2-NEXT: ret <16 x i16> [[RES]]
+;
+; AVX512-LABEL: define <16 x i16> @src_v8tov16_i16(
+; AVX512-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX512-NEXT: ret <16 x i16> [[RES]]
;
%select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
%select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
@@ -108,28 +91,12 @@ define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x
}
define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
-; SSE-LABEL: define <4 x i32> @src_v2tov4_i32(
-; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
-; SSE-NEXT: ret <4 x i32> [[RES]]
-;
-; AVX2-LABEL: define <4 x i32> @src_v2tov4_i32(
-; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
-; AVX2-NEXT: ret <4 x i32> [[RES]]
-;
-; AVX512-LABEL: define <4 x i32> @src_v2tov4_i32(
-; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
-; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
-; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX512-NEXT: ret <4 x i32> [[RES]]
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
%select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
@@ -138,12 +105,27 @@ define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i3
}
define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
-; CHECK-LABEL: define <8 x i32> @src_v4tov8_i32(
-; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x i32> [[RES]]
+; SSE-LABEL: define <8 x i32> @src_v4tov8_i32(
+; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: ret <8 x i32> [[RES]]
+;
+; AVX2-LABEL: define <8 x i32> @src_v4tov8_i32(
+; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; AVX2-NEXT: ret <8 x i32> [[RES]]
+;
+; AVX512-LABEL: define <8 x i32> @src_v4tov8_i32(
+; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT: ret <8 x i32> [[RES]]
;
%select.xz = select <4 x i1> %a, <4 x i32> %x, <4 x i32> %z
%select.yx = select <4 x i1> %b, <4 x i32> %y, <4 x i32> %x
@@ -182,12 +164,27 @@ define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6
}
define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
-; CHECK-LABEL: define <4 x i64> @src_v2tov4_i64(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x i64> [[RES]]
+; SSE-LABEL: define <4 x i64> @src_v2tov4_i64(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX2-LABEL: define <4 x i64> @src_v2tov4_i64(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
+; AVX2-NEXT: ret <4 x i64> [[RES]]
+;
+; AVX512-LABEL: define <4 x i64> @src_v2tov4_i64(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x i64> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
%select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
@@ -226,12 +223,27 @@ define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <
}
define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
-; CHECK-LABEL: define <8 x float> @src_v4tov8_float(
-; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x float> [[RES]]
+; SSE-LABEL: define <8 x float> @src_v4tov8_float(
+; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: ret <8 x float> [[RES]]
+;
+; AVX2-LABEL: define <8 x float> @src_v4tov8_float(
+; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]]
+; AVX2-NEXT: ret <8 x float> [[RES]]
+;
+; AVX512-LABEL: define <8 x float> @src_v4tov8_float(
+; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT: ret <8 x float> [[RES]]
;
%select.xz = select <4 x i1> %a, <4 x float> %x, <4 x float> %z
%select.yx = select <4 x i1> %b, <4 x float> %y, <4 x float> %x
@@ -270,12 +282,27 @@ define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
}
define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
-; CHECK-LABEL: define <4 x double> @src_v2tov4_double(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x double> [[RES]]
+; SSE-LABEL: define <4 x double> @src_v2tov4_double(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: ret <4 x double> [[RES]]
+;
+; AVX2-LABEL: define <4 x double> @src_v2tov4_double(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]]
+; AVX2-NEXT: ret <4 x double> [[RES]]
+;
+; AVX512-LABEL: define <4 x double> @src_v2tov4_double(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x double> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
%select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
@@ -283,6 +310,37 @@ define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
ret <4 x double> %res
}
+; ISSUE - https://github.com/llvm/llvm-project/issues/130250
+; There should be no issues when the mask elements are in the following range
+; DestVectorSize * 2 < MaskEls < SrcVectorSize * 2
+define <2 x float> @test_mask0(<4 x i1> %c, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <2 x float> @test_mask0(
+; CHECK-SAME: <4 x i1> [[C:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XY:%.*]] = select <4 x i1> [[C]], <4 x float> [[X]], <4 x float> [[Y]]
+; CHECK-NEXT: [[SELECT_YZ:%.*]] = select <4 x i1> [[C]], <4 x float> [[Y]], <4 x float> [[Z]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XY]], <4 x float> [[SELECT_YZ]], <2 x i32> <i32 4, i32 7>
+; CHECK-NEXT: ret <2 x float> [[RES]]
+;
+ %select.xy = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ %select.yz = select <4 x i1> %c, <4 x float> %y, <4 x float> %z
+ %res = shufflevector <4 x float> %select.xy, <4 x float> %select.yz, <2 x i32> <i32 4, i32 7>
+ ret <2 x float> %res
+}
+
+define <2 x float> @test_mask1(<4 x i1> %c, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <2 x float> @test_mask1(
+; CHECK-SAME: <4 x i1> [[C:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XY:%.*]] = select <4 x i1> [[C]], <4 x float> [[X]], <4 x float> [[Y]]
+; CHECK-NEXT: [[SELECT_YZ:%.*]] = select <4 x i1> [[C]], <4 x float> [[Y]], <4 x float> [[Z]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XY]], <4 x float> [[SELECT_YZ]], <2 x i32> <i32 7, i32 4>
+; CHECK-NEXT: ret <2 x float> [[RES]]
+;
+ %select.xy = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ %select.yz = select <4 x i1> %c, <4 x float> %y, <4 x float> %z
+ %res = shufflevector <4 x float> %select.xy, <4 x float> %select.yz, <2 x i32> <i32 7, i32 4>
+ ret <2 x float> %res
+}
+
; FMF Flags
define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan(
More information about the llvm-commits
mailing list