[llvm] 89ca3e7 - [CostModel][X86] Reduce worst case v8i16/v16i8 SSE2 shuffle costs (#124789)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 02:23:13 PST 2025
Author: Simon Pilgrim
Date: 2025-01-29T10:23:09Z
New Revision: 89ca3e72ca03efbbfb5ae9b1c71d81f2d1753521
URL: https://github.com/llvm/llvm-project/commit/89ca3e72ca03efbbfb5ae9b1c71d81f2d1753521
DIFF: https://github.com/llvm/llvm-project/commit/89ca3e72ca03efbbfb5ae9b1c71d81f2d1753521.diff
LOG: [CostModel][X86] Reduce worst case v8i16/v16i8 SSE2 shuffle costs (#124789)
These were based off instruction count, not throughput - we can probably improve these further, but these throughput numbers match the worse expanded shuffles we see in the vector-shuffle-128-v* codegen tests.
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/reduce-xor.ll
llvm/test/Analysis/CostModel/X86/reduction.ll
llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll
llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
llvm/test/Analysis/CostModel/X86/vector-insert.ll
llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll
llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 82523bb6557adc7..f6552c500d7cfbf 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2204,19 +2204,19 @@ InstructionCost X86TTIImpl::getShuffleCost(
{TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_PermuteSingleSrc, MVT::v8i16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
// + pshufd/unpck
- {TTI::SK_PermuteSingleSrc, MVT::v8f16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw
// + pshufd/unpck
- {TTI::SK_PermuteSingleSrc, MVT::v16i8, {10, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + 2*packus
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, {8, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + 2*packus
{TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_PermuteTwoSrc, MVT::v4i32, {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd}
- {TTI::SK_PermuteTwoSrc, MVT::v8i16, {8, 8, 8, 8}}, // blend+permute
- {TTI::SK_PermuteTwoSrc, MVT::v8f16, {8, 8, 8, 8}}, // blend+permute
- {TTI::SK_PermuteTwoSrc, MVT::v16i8, {13, 13, 13, 13}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, {6, 8, 8, 8}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v8f16, {6, 8, 8, 8}}, // blend+permute
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, {11, 13, 13, 13}}, // blend+permute
};
static const CostTblEntry SSE3BroadcastLoadTbl[] = {
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
index f1c8bcda65bd134..757db4a5a41beca 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
@@ -157,11 +157,11 @@ define i32 @reduce_i1(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i1'
diff --git a/llvm/test/Analysis/CostModel/X86/reduction.ll b/llvm/test/Analysis/CostModel/X86/reduction.ll
index 5ff3920c638749d..4ad0887a27884dc 100644
--- a/llvm/test/Analysis/CostModel/X86/reduction.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduction.ll
@@ -634,9 +634,9 @@ define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
; SSE2-LABEL: 'no_pairwise_reduction8i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
@@ -1126,11 +1126,11 @@ define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
; SSE2-LABEL: 'pairwise_reduction8i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
index e91db1b0cb14a37..f0536f96941ac51 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
@@ -251,7 +251,7 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256,
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -275,7 +275,7 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256,
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -899,9 +899,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -923,9 +923,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
index bc3af51914499af..a9443cfe0f97bdf 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
@@ -527,23 +527,23 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -858,19 +858,19 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi8'
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
index ed3ddc060c16734..595826cd6e8d5c3 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
@@ -193,10 +193,10 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi16'
@@ -285,9 +285,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi8'
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll
index 5c50ec1feb05822..8663659d89cb330 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll
@@ -128,9 +128,9 @@ define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> <i32 0, i32 2>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi16'
@@ -202,9 +202,9 @@ define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b3
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> <i32 0, i32 2>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> <i32 0, i32 64, i32 2, i32 66, i32 4, i32 68, i32 6, i32 70, i32 8, i32 72, i32 10, i32 74, i32 12, i32 76, i32 14, i32 78, i32 16, i32 80, i32 18, i32 82, i32 20, i32 84, i32 22, i32 86, i32 24, i32 88, i32 26, i32 90, i32 28, i32 92, i32 30, i32 94, i32 32, i32 96, i32 34, i32 98, i32 36, i32 100, i32 38, i32 102, i32 40, i32 104, i32 42, i32 106, i32 44, i32 108, i32 46, i32 110, i32 48, i32 112, i32 50, i32 114, i32 52, i32 116, i32 54, i32 118, i32 56, i32 120, i32 58, i32 122, i32 60, i32 124, i32 62, i32 126>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> <i32 0, i32 64, i32 2, i32 66, i32 4, i32 68, i32 6, i32 70, i32 8, i32 72, i32 10, i32 74, i32 12, i32 76, i32 14, i32 78, i32 16, i32 80, i32 18, i32 82, i32 20, i32 84, i32 22, i32 86, i32 24, i32 88, i32 26, i32 90, i32 28, i32 92, i32 30, i32 94, i32 32, i32 96, i32 34, i32 98, i32 36, i32 100, i32 38, i32 102, i32 40, i32 104, i32 42, i32 106, i32 44, i32 108, i32 46, i32 110, i32 48, i32 112, i32 50, i32 114, i32 52, i32 116, i32 54, i32 118, i32 56, i32 120, i32 58, i32 122, i32 60, i32 124, i32 62, i32 126>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi8'
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
index efa0f2eb8dc9411..67c753b77f13436 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
@@ -237,10 +237,10 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi16'
@@ -329,9 +329,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 3, i32 0>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'test_vXi8'
diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
index 9caaae0717fda8c..ed034d7bccc94d7 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
@@ -674,25 +674,25 @@ define i32 @insert_i8(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'insert_i8'
@@ -707,25 +707,25 @@ define i32 @insert_i8(i32 %arg) {
; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'insert_i8'
@@ -978,25 +978,25 @@ define i32 @insert_i1(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'insert_i1'
@@ -1011,25 +1011,25 @@ define i32 @insert_i1(i32 %arg) {
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'insert_i1'
diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
index ee82e10f9ebb6d4..aff1de0bfde613b 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
@@ -673,26 +673,26 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> %src64, i8 %val, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> %src64, i8 %val, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> %src128, i8 %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> %src256, i8 %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> %src512, i8 %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'insert_i8'
@@ -706,26 +706,26 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> %src64, i8 %val, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> %src64, i8 %val, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> %src128, i8 %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> %src256, i8 %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> %src512, i8 %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'insert_i8'
@@ -977,26 +977,26 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> %src64, i1 %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'insert_i1'
@@ -1010,26 +1010,26 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> %src64, i1 %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'insert_i1'
diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert.ll b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
index 1d2e1f23b9b537c..103b7be692025bc 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
@@ -674,25 +674,25 @@ define i32 @insert_i8(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'insert_i8'
@@ -707,25 +707,25 @@ define i32 @insert_i8(i32 %arg) {
; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'insert_i8'
@@ -978,25 +978,25 @@ define i32 @insert_i1(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'insert_i1'
@@ -1011,25 +1011,25 @@ define i32 @insert_i1(i32 %arg) {
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
-; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
+; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'insert_i1'
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
index 056d9d1fba1415b..0c9f279c01bae04 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
@@ -59,15 +59,12 @@ define <8 x i16> @add_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: @add_v8i16_u1234567(
-; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP5:%.*]] = add <8 x i16> [[A]], [[SHIFT2]]
-; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
+; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
; SSE2-NEXT: [[TMP6:%.*]] = add <8 x i16> [[A]], [[SHIFT3]]
-; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[HADD1:%.*]] = add <8 x i16> [[TMP7]], [[TMP4]]
-; SSE2-NEXT: [[HADD2:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[HADD3:%.*]] = shufflevector <8 x i16> [[HADD2]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[HADD3:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
index 572ec9efafe1ae0..ae05f6470e56367 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
@@ -59,15 +59,12 @@ define <8 x i16> @sub_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @sub_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: @sub_v8i16_u1234567(
-; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[A]], [[SHIFT2]]
-; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
+; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[A]], [[SHIFT3]]
-; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[HSUB1:%.*]] = sub <8 x i16> [[TMP7]], [[TMP4]]
-; SSE2-NEXT: [[HSUB2:%.*]] = shufflevector <8 x i16> [[HSUB1]], <8 x i16> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB2]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB1]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll
index 8d37665224e7ee1..f18a72b0bf77683 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll
@@ -10,24 +10,16 @@ define <2 x i64> @load_00123456(ptr nocapture noundef readonly %data) {
; SSE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[DATA:%.*]], i64 1
; SSE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 2
; SSE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 3
-; SSE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 4
-; SSE-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 5
-; SSE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 6
; SSE-NEXT: [[T0:%.*]] = load i16, ptr [[DATA]], align 2
; SSE-NEXT: [[T1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; SSE-NEXT: [[T2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
-; SSE-NEXT: [[T3:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
-; SSE-NEXT: [[T4:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2
-; SSE-NEXT: [[T5:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
-; SSE-NEXT: [[T6:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX3]], align 2
; SSE-NEXT: [[VECINIT0_I_I:%.*]] = insertelement <8 x i16> undef, i16 [[T0]], i64 0
; SSE-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i16> [[VECINIT0_I_I]], i16 [[T0]], i64 1
; SSE-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I_I]], i16 [[T1]], i64 2
; SSE-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I_I]], i16 [[T2]], i64 3
-; SSE-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I_I]], i16 [[T3]], i64 4
-; SSE-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I_I]], i16 [[T4]], i64 5
-; SSE-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I_I]], i16 [[T5]], i64 6
-; SSE-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I_I]], i16 [[T6]], i64 7
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[VECINIT7_I_I:%.*]] = shufflevector <8 x i16> [[VECINIT3_I_I]], <8 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: [[T7:%.*]] = bitcast <8 x i16> [[VECINIT7_I_I]] to <2 x i64>
; SSE-NEXT: ret <2 x i64> [[T7]]
;
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
index 2b0855dda6dc262..6907e121583375c 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
@@ -85,20 +85,12 @@ define i1 @icmp_samesign_xor_v4i32(<4 x i32> %a) {
; add is not canonical (should be xor), but that is ok.
define i1 @icmp_add_v8i32(<8 x i32> %a) {
-; SSE-LABEL: @icmp_add_v8i32(
-; SSE-NEXT: [[E1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 7
-; SSE-NEXT: [[E2:%.*]] = extractelement <8 x i32> [[A]], i32 2
-; SSE-NEXT: [[CMP1:%.*]] = icmp eq i32 [[E1]], 42
-; SSE-NEXT: [[CMP2:%.*]] = icmp eq i32 [[E2]], -8
-; SSE-NEXT: [[R:%.*]] = add i1 [[CMP1]], [[CMP2]]
-; SSE-NEXT: ret i1 [[R]]
-;
-; AVX-LABEL: @icmp_add_v8i32(
-; AVX-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 -8, i32 poison, i32 poison, i32 poison, i32 poison, i32 42>
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]]
-; AVX-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2
-; AVX-NEXT: ret i1 [[R]]
+; CHECK-LABEL: @icmp_add_v8i32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 -8, i32 poison, i32 poison, i32 poison, i32 poison, i32 42>
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2
+; CHECK-NEXT: ret i1 [[R]]
;
%e1 = extractelement <8 x i32> %a, i32 7
%e2 = extractelement <8 x i32> %a, i32 2
More information about the llvm-commits
mailing list