[llvm] 7397dcb - [TTI] Add basic SK_InsertSubvector shuffle mask recognition
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 2 03:25:23 PDT 2021
Author: Simon Pilgrim
Date: 2021-08-02T11:23:44+01:00
New Revision: 7397dcb403c2214d76444b3f3b16bb4eeed42e58
URL: https://github.com/llvm/llvm-project/commit/7397dcb403c2214d76444b3f3b16bb4eeed42e58
DIFF: https://github.com/llvm/llvm-project/commit/7397dcb403c2214d76444b3f3b16bb4eeed42e58.diff
LOG: [TTI] Add basic SK_InsertSubvector shuffle mask recognition
This patch adds an initial ShuffleVectorInst::isInsertSubvectorMask helper to recognize 2-op shuffles where the lowest elements of one of the sources are being inserted into the "in-place" other operand, this includes "concat_vectors" patterns as can be seen in the Arm shuffle cost changes. This also helped fix a x86 issue with irregular/length-changing SK_InsertSubvector costs - I'm hoping this will help with D107188
This doesn't currently attempt to work with 1-op shuffles that could either be a "widening" shuffle or a self-insertion.
The self-insertion case is tricky, but we currently always match this with the existing SK_PermuteSingleSrc logic.
The widening case will be addressed in a follow up patch that treats the cost as 0.
Masks with a high number of undef elts will still struggle to match optimal subvector widths - its currently bounded by minimum-width possible insertion, whilst some cases would benefit from wider (pow2?) subvectors.
Differential Revision: https://reviews.llvm.org/D107228
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/IR/Instructions.h
llvm/lib/IR/Instructions.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/ARM/shuffle.ll
llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d66fc1d54c5ea..13836e723583a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1074,7 +1074,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
auto *VecTy = cast<VectorType>(U->getType());
auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
- int SubIndex;
+ int NumSubElts, SubIndex;
if (Shuffle->changesLength()) {
if (Shuffle->isExtractSubvectorMask(SubIndex))
@@ -1082,7 +1082,12 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
Shuffle->getShuffleMask(), SubIndex,
VecTy);
- // TODO: Identify and add costs for insert subvector, etc.
+ if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+ return TargetTTI->getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
+ SubIndex,
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
}
@@ -1109,7 +1114,11 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- // TODO: Identify and add costs for insert subvector, etc.
+ if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+ return TargetTTI->getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex,
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
}
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 0c43a56daa336..4d3f512b03ed6 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2306,6 +2306,36 @@ class ShuffleVectorInst : public Instruction {
return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index);
}
+ /// Return true if this shuffle mask is an insert subvector mask.
+ /// A valid insert subvector mask inserts the lowest elements of a second
+ /// source operand into an in-place first source operand operand.
+ /// Both the sub vector width and the insertion index is returned.
+ static bool isInsertSubvectorMask(ArrayRef<int> Mask, int NumSrcElts,
+ int &NumSubElts, int &Index);
+ static bool isInsertSubvectorMask(const Constant *Mask, int NumSrcElts,
+ int &NumSubElts, int &Index) {
+ assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(Mask->getType()))
+ return false;
+ SmallVector<int, 16> MaskAsInts;
+ getShuffleMask(Mask, MaskAsInts);
+ return isInsertSubvectorMask(MaskAsInts, NumSrcElts, NumSubElts, Index);
+ }
+
+ /// Return true if this shuffle mask is an insert subvector mask.
+ bool isInsertSubvectorMask(int &NumSubElts, int &Index) const {
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ int NumSrcElts =
+ cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+ return isInsertSubvectorMask(ShuffleMask, NumSrcElts, NumSubElts, Index);
+ }
+
/// Change values in a shuffle permute mask assuming the two vector operands
/// of length InVecNumElts have swapped position.
static void commuteShuffleMask(MutableArrayRef<int> Mask,
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 5b01c70dec8d8..053737fe3671e 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2259,6 +2259,80 @@ bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef<int> Mask,
return false;
}
+bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
+ int NumSrcElts, int &NumSubElts,
+ int &Index) {
+ int NumMaskElts = Mask.size();
+
+ // Don't try to match if we're shuffling to a smaller size.
+ if (NumMaskElts < NumSrcElts)
+ return false;
+
+ // TODO: We don't recognize self-insertion/widening.
+ if (isSingleSourceMaskImpl(Mask, NumSrcElts))
+ return false;
+
+ // Determine which mask elements are attributed to which source.
+ APInt UndefElts = APInt::getNullValue(NumMaskElts);
+ APInt Src0Elts = APInt::getNullValue(NumMaskElts);
+ APInt Src1Elts = APInt::getNullValue(NumMaskElts);
+ bool Src0Identity = true;
+ bool Src1Identity = true;
+
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (M < NumSrcElts) {
+ Src0Elts.setBit(i);
+ Src0Identity &= (M == i);
+ continue;
+ }
+ Src1Elts.setBit(i);
+ Src1Identity &= (M == (i + NumSrcElts));
+ continue;
+ }
+ assert((Src0Elts | Src1Elts | UndefElts).isAllOnesValue() &&
+ "unknown shuffle elements");
+ assert(!Src0Elts.isNullValue() && !Src1Elts.isNullValue() &&
+ "2-source shuffle not found");
+
+ // Determine lo/hi span ranges.
+ // TODO: How should we handle undefs at the start of subvector insertions?
+ int Src0Lo = Src0Elts.countTrailingZeros();
+ int Src1Lo = Src1Elts.countTrailingZeros();
+ int Src0Hi = NumMaskElts - Src0Elts.countLeadingZeros();
+ int Src1Hi = NumMaskElts - Src1Elts.countLeadingZeros();
+
+ // If src0 is in place, see if the src1 elements is inplace within its own
+ // span.
+ if (Src0Identity) {
+ int NumSub1Elts = Src1Hi - Src1Lo;
+ ArrayRef<int> Sub1Mask = Mask.slice(Src1Lo, NumSub1Elts);
+ if (isIdentityMaskImpl(Sub1Mask, NumSrcElts)) {
+ NumSubElts = NumSub1Elts;
+ Index = Src1Lo;
+ return true;
+ }
+ }
+
+ // If src1 is in place, see if the src0 elements is inplace within its own
+ // span.
+ if (Src1Identity) {
+ int NumSub0Elts = Src0Hi - Src0Lo;
+ ArrayRef<int> Sub0Mask = Mask.slice(Src0Lo, NumSub0Elts);
+ if (isIdentityMaskImpl(Sub0Mask, NumSrcElts)) {
+ NumSubElts = NumSub0Elts;
+ Index = Src0Lo;
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool ShuffleVectorInst::isIdentityWithPadding() const {
if (isa<UndefValue>(Op<2>()))
return false;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 971c430d73b15..1cdd405012532 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1121,6 +1121,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
return SubLT.first;
}
+
+ // If the insertion isn't aligned, treat it like a 2-op shuffle.
+ Kind = TTI::SK_PermuteTwoSrc;
}
// Handle some common (illegal) sub-vector types as they are often very cheap
diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll
index 0cefb4a13c8d3..642f68fd2b98b 100644
--- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll
+++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll
@@ -171,39 +171,39 @@ define void @reverse() {
define void @concat() {
; CHECK-MVE-LABEL: 'concat'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-NEON-LABEL: 'concat'
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
index f2426925009f9..280cb4c7ee8f0 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
@@ -22,13 +22,13 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test_vXf64'
@@ -36,13 +36,13 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_vXf64'
@@ -80,13 +80,13 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test_vXi64'
@@ -94,13 +94,13 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_vXi64'
@@ -144,24 +144,24 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXf32'
@@ -175,24 +175,24 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_vXf32'
@@ -206,24 +206,24 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'test_vXf32'
@@ -237,24 +237,24 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'test_vXf32'
@@ -268,24 +268,24 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_vXf32'
@@ -365,24 +365,24 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi32'
@@ -396,24 +396,24 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_vXi32'
@@ -427,24 +427,24 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'test_vXi32'
@@ -458,24 +458,24 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'test_vXi32'
@@ -489,24 +489,24 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_vXi32'
@@ -598,16 +598,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi16'
@@ -633,16 +633,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_vXi16'
@@ -668,16 +668,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'test_vXi16'
@@ -703,16 +703,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'test_vXi16'
@@ -738,16 +738,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512F-LABEL: 'test_vXi16'
@@ -773,16 +773,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512BW-LABEL: 'test_vXi16'
@@ -808,16 +808,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512VMBI-LABEL: 'test_vXi16'
@@ -843,16 +843,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
More information about the llvm-commits
mailing list