[llvm] 5db826e - [CostModel][X86] Handle costs for insert/extractelement with non-immediate indices via stack

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 5 05:29:54 PDT 2021


Author: Simon Pilgrim
Date: 2021-07-05T13:26:53+01:00
New Revision: 5db826e4ce63a4d45a4695e5486841d08b86cd24

URL: https://github.com/llvm/llvm-project/commit/5db826e4ce63a4d45a4695e5486841d08b86cd24
DIFF: https://github.com/llvm/llvm-project/commit/5db826e4ce63a4d45a4695e5486841d08b86cd24.diff

LOG: [CostModel][X86] Handle costs for insert/extractelement with non-immediate indices via stack

Determine the insert/extractelement costs when performing this as a sequence of aliased loads+stores via the stack.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
    llvm/test/Analysis/CostModel/X86/vector-extract.ll
    llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vector-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 442785a6c355..3fd19702923a 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3263,6 +3263,36 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   Type *ScalarType = Val->getScalarType();
   int RegisterFileMoveCost = 0;
 
+  // Non-immediate extraction/insertion can be handled as a sequence of
+  // aliased loads+stores via the stack.
+  if (Index == -1U && (Opcode == Instruction::ExtractElement ||
+                       Opcode == Instruction::InsertElement)) {
+    // TODO: On some SSE41+ targets, we expand to cmp+splat+select patterns:
+    // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0. 
+
+    // TODO: Move this to BasicTTIImpl.h? We'd need better gep + index handling.
+    assert(isa<FixedVectorType>(Val) && "Fixed vector type expected");
+    Align VecAlign = DL.getPrefTypeAlign(Val);
+    Align SclAlign = DL.getPrefTypeAlign(ScalarType);
+
+    // Extract - store vector to stack, load scalar.
+    if (Opcode == Instruction::ExtractElement) {
+      return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
+                             TTI::TargetCostKind::TCK_RecipThroughput) +
+             getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
+                             TTI::TargetCostKind::TCK_RecipThroughput);
+    }
+    // Insert - store vector to stack, store scalar, load vector.
+    if (Opcode == Instruction::InsertElement) {
+      return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
+                             TTI::TargetCostKind::TCK_RecipThroughput) +
+             getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
+                             TTI::TargetCostKind::TCK_RecipThroughput) +
+             getMemoryOpCost(Instruction::Load, Val, VecAlign, 0,
+                             TTI::TargetCostKind::TCK_RecipThroughput);
+    }
+  }
+
   if (Index != -1U && (Opcode == Instruction::ExtractElement ||
                        Opcode == Instruction::InsertElement)) {
     // Legalize the type.

diff  --git a/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll
index 0d5b07c4c0f8..f843f945d866 100644
--- a/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll
@@ -16,7 +16,7 @@ define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
   ;CHECK: cost of 1 {{.*}} extract
   %E = extractelement <8 x float> undef, i32 1
 
-  ;CHECK: cost of 1 {{.*}} extract
+  ;CHECK: cost of 3 {{.*}} extract
   %F = extractelement <8 x float> undef, i32 %arg
 
   ;CHECK: cost of 0 {{.*}} insert

diff  --git a/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
index 87bf7c488b91..64131592de5d 100644
--- a/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
+++ b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
@@ -16,7 +16,7 @@ define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
   ;CHECK: cost of 1 {{.*}} extract
   %E = extractelement <8 x float> undef, i32 1
 
-  ;CHECK: cost of 1 {{.*}} extract
+  ;CHECK: cost of 3 {{.*}} extract
   %F = extractelement <8 x float> undef, i32 %arg
 
   ;CHECK: cost of 0 {{.*}} insert

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-extract.ll b/llvm/test/Analysis/CostModel/X86/vector-extract.ll
index db6bf348d88b..d983a9cd81ae 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-extract.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-extract.ll
@@ -6,8 +6,8 @@
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM
@@ -15,13 +15,13 @@
 
 define i32 @extract_double(i32 %arg) {
 ; SSE-LABEL: 'extract_double'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4
@@ -29,13 +29,13 @@ define i32 @extract_double(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_double'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4
@@ -43,13 +43,13 @@ define i32 @extract_double(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'extract_double'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4
@@ -75,18 +75,18 @@ define i32 @extract_double(i32 %arg) {
 
 define i32 @extract_float(i32 %arg) {
 ; SSE-LABEL: 'extract_float'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8
@@ -94,18 +94,18 @@ define i32 @extract_float(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_float'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8
@@ -113,18 +113,18 @@ define i32 @extract_float(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'extract_float'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8
@@ -156,13 +156,13 @@ define i32 @extract_float(i32 %arg) {
 
 define i32 @extract_i64(i32 %arg) {
 ; SSE2-LABEL: 'extract_i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -170,13 +170,13 @@ define i32 @extract_i64(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'extract_i64'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -184,13 +184,13 @@ define i32 @extract_i64(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'extract_i64'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -198,13 +198,13 @@ define i32 @extract_i64(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'extract_i64'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -212,13 +212,13 @@ define i32 @extract_i64(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -226,13 +226,13 @@ define i32 @extract_i64(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'extract_i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -240,13 +240,13 @@ define i32 @extract_i64(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'extract_i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -254,13 +254,13 @@ define i32 @extract_i64(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'extract_i64'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
@@ -286,18 +286,18 @@ define i32 @extract_i64(i32 %arg) {
 
 define i32 @extract_i32(i32 %arg) {
 ; SSE2-LABEL: 'extract_i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -305,18 +305,18 @@ define i32 @extract_i32(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'extract_i32'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -324,18 +324,18 @@ define i32 @extract_i32(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'extract_i32'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -343,18 +343,18 @@ define i32 @extract_i32(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'extract_i32'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -362,18 +362,18 @@ define i32 @extract_i32(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -381,18 +381,18 @@ define i32 @extract_i32(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'extract_i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -400,18 +400,18 @@ define i32 @extract_i32(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'extract_i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -419,18 +419,18 @@ define i32 @extract_i32(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'extract_i32'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
@@ -462,21 +462,21 @@ define i32 @extract_i32(i32 %arg) {
 
 define i32 @extract_i16(i32 %arg) {
 ; SSE2-LABEL: 'extract_i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -487,21 +487,21 @@ define i32 @extract_i16(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'extract_i16'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -512,21 +512,21 @@ define i32 @extract_i16(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'extract_i16'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -537,21 +537,21 @@ define i32 @extract_i16(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'extract_i16'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -562,21 +562,21 @@ define i32 @extract_i16(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -587,21 +587,21 @@ define i32 @extract_i16(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'extract_i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -612,21 +612,21 @@ define i32 @extract_i16(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'extract_i16'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -637,21 +637,21 @@ define i32 @extract_i16(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'extract_i16'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
@@ -693,27 +693,27 @@ define i32 @extract_i16(i32 %arg) {
 
 define i32 @extract_i8(i32 %arg) {
 ; SSE2-LABEL: 'extract_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -726,27 +726,27 @@ define i32 @extract_i8(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'extract_i8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -759,27 +759,27 @@ define i32 @extract_i8(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'extract_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -792,27 +792,27 @@ define i32 @extract_i8(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'extract_i8'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -825,27 +825,27 @@ define i32 @extract_i8(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -858,27 +858,27 @@ define i32 @extract_i8(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'extract_i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -891,27 +891,27 @@ define i32 @extract_i8(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'extract_i8'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -924,27 +924,27 @@ define i32 @extract_i8(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'extract_i8'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = extractelement <8 x i8> undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
@@ -997,27 +997,27 @@ define i32 @extract_i8(i32 %arg) {
 
 define i32 @extract_i1(i32 %arg) {
 ; SSE2-LABEL: 'extract_i1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
@@ -1030,27 +1030,27 @@ define i32 @extract_i1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'extract_i1'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
@@ -1063,27 +1063,27 @@ define i32 @extract_i1(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'extract_i1'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
@@ -1096,27 +1096,27 @@ define i32 @extract_i1(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'extract_i1'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
@@ -1129,27 +1129,27 @@ define i32 @extract_i1(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_i1'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
@@ -1161,61 +1161,94 @@ define i32 @extract_i1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_63 = extractelement <64 x i1> undef, i32 63
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; AVX512-LABEL: 'extract_i1'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = extractelement <64 x i1> undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = extractelement <64 x i1> undef, i32 24
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = extractelement <64 x i1> undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = extractelement <64 x i1> undef, i32 32
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = extractelement <64 x i1> undef, i32 48
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = extractelement <64 x i1> undef, i32 63
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512F-LABEL: 'extract_i1'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = extractelement <64 x i1> undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = extractelement <64 x i1> undef, i32 24
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = extractelement <64 x i1> undef, i32 31
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = extractelement <64 x i1> undef, i32 32
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = extractelement <64 x i1> undef, i32 48
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = extractelement <64 x i1> undef, i32 63
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'extract_i1'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = extractelement <64 x i1> undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = extractelement <64 x i1> undef, i32 24
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = extractelement <64 x i1> undef, i32 31
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = extractelement <64 x i1> undef, i32 32
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = extractelement <64 x i1> undef, i32 48
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = extractelement <64 x i1> undef, i32 63
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'extract_i1'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
@@ -1228,27 +1261,27 @@ define i32 @extract_i1(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'extract_i1'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
index f6f5ed8029b1..ef136186927b 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
@@ -6,8 +6,8 @@
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM
@@ -15,13 +15,13 @@
 
 define i32 @insert_double(i32 %arg) {
 ; SSE-LABEL: 'insert_double'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4
@@ -29,13 +29,13 @@ define i32 @insert_double(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_double'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4
@@ -43,13 +43,13 @@ define i32 @insert_double(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_double'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4
@@ -75,18 +75,18 @@ define i32 @insert_double(i32 %arg) {
 
 define i32 @insert_float(i32 %arg) {
 ; SSE2-LABEL: 'insert_float'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -94,18 +94,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_float'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -113,18 +113,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_float'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -132,18 +132,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_float'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -151,18 +151,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_float'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -170,18 +170,18 @@ define i32 @insert_float(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_float'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -189,18 +189,18 @@ define i32 @insert_float(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_float'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -208,18 +208,18 @@ define i32 @insert_float(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_float'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8
@@ -251,13 +251,13 @@ define i32 @insert_float(i32 %arg) {
 
 define i32 @insert_i64(i32 %arg) {
 ; SSE2-LABEL: 'insert_i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -265,13 +265,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i64'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -279,13 +279,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i64'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -293,13 +293,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i64'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -307,13 +307,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -321,13 +321,13 @@ define i32 @insert_i64(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -335,13 +335,13 @@ define i32 @insert_i64(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -349,13 +349,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i64'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4
@@ -381,18 +381,18 @@ define i32 @insert_i64(i32 %arg) {
 
 define i32 @insert_i32(i32 %arg) {
 ; SSE2-LABEL: 'insert_i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -400,18 +400,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i32'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -419,18 +419,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i32'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -438,18 +438,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i32'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -457,18 +457,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -476,18 +476,18 @@ define i32 @insert_i32(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -495,18 +495,18 @@ define i32 @insert_i32(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -514,18 +514,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i32'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
@@ -557,21 +557,21 @@ define i32 @insert_i32(i32 %arg) {
 
 define i32 @insert_i16(i32 %arg) {
 ; SSE-LABEL: 'insert_i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8
@@ -582,21 +582,21 @@ define i32 @insert_i16(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8
@@ -607,21 +607,21 @@ define i32 @insert_i16(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8
@@ -663,27 +663,27 @@ define i32 @insert_i16(i32 %arg) {
 
 define i32 @insert_i8(i32 %arg) {
 ; SSE2-LABEL: 'insert_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -696,27 +696,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -729,27 +729,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -762,27 +762,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i8'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -795,27 +795,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -828,27 +828,27 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -861,27 +861,27 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i8'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -894,27 +894,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i8'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
@@ -967,27 +967,27 @@ define i32 @insert_i8(i32 %arg) {
 
 define i32 @insert_i1(i32 %arg) {
 ; SSE2-LABEL: 'insert_i1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
@@ -1000,27 +1000,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i1'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
@@ -1033,27 +1033,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i1'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
@@ -1066,27 +1066,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i1'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
@@ -1099,27 +1099,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i1'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
@@ -1131,61 +1131,94 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; AVX512-LABEL: 'insert_i1'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512F-LABEL: 'insert_i1'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'insert_i1'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i1'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
@@ -1198,27 +1231,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i1'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-insert.ll b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
index 08e3520d9bdf..054ffae46dc3 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
@@ -6,8 +6,8 @@
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM
@@ -15,13 +15,13 @@
 
 define i32 @insert_double(i32 %arg) {
 ; SSE-LABEL: 'insert_double'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4
@@ -29,13 +29,13 @@ define i32 @insert_double(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_double'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4
@@ -43,13 +43,13 @@ define i32 @insert_double(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_double'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4
@@ -75,18 +75,18 @@ define i32 @insert_double(i32 %arg) {
 
 define i32 @insert_float(i32 %arg) {
 ; SSE2-LABEL: 'insert_float'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -94,18 +94,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_float'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -113,18 +113,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_float'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -132,18 +132,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_float'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -151,18 +151,18 @@ define i32 @insert_float(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_float'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -170,18 +170,18 @@ define i32 @insert_float(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_float'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -189,18 +189,18 @@ define i32 @insert_float(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_float'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -208,18 +208,18 @@ define i32 @insert_float(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_float'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
@@ -251,13 +251,13 @@ define i32 @insert_float(i32 %arg) {
 
 define i32 @insert_i64(i32 %arg) {
 ; SSE2-LABEL: 'insert_i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -265,13 +265,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i64'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -279,13 +279,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i64'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -293,13 +293,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i64'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -307,13 +307,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -321,13 +321,13 @@ define i32 @insert_i64(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -335,13 +335,13 @@ define i32 @insert_i64(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -349,13 +349,13 @@ define i32 @insert_i64(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i64'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
@@ -381,18 +381,18 @@ define i32 @insert_i64(i32 %arg) {
 
 define i32 @insert_i32(i32 %arg) {
 ; SSE2-LABEL: 'insert_i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -400,18 +400,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i32'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -419,18 +419,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i32'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -438,18 +438,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i32'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -457,18 +457,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -476,18 +476,18 @@ define i32 @insert_i32(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -495,18 +495,18 @@ define i32 @insert_i32(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -514,18 +514,18 @@ define i32 @insert_i32(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i32'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
@@ -557,21 +557,21 @@ define i32 @insert_i32(i32 %arg) {
 
 define i32 @insert_i16(i32 %arg) {
 ; SSE-LABEL: 'insert_i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 undef, i32 1
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> undef, i16 undef, i32 3
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
@@ -582,21 +582,21 @@ define i32 @insert_i16(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> undef, i16 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
@@ -607,21 +607,21 @@ define i32 @insert_i16(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> undef, i16 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
@@ -663,27 +663,27 @@ define i32 @insert_i16(i32 %arg) {
 
 define i32 @insert_i8(i32 %arg) {
 ; SSE2-LABEL: 'insert_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -696,27 +696,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -729,27 +729,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -762,27 +762,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i8'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -795,27 +795,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -828,27 +828,27 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -861,27 +861,27 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i8'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -894,27 +894,27 @@ define i32 @insert_i8(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i8'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
@@ -967,27 +967,27 @@ define i32 @insert_i8(i32 %arg) {
 
 define i32 @insert_i1(i32 %arg) {
 ; SSE2-LABEL: 'insert_i1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
@@ -1000,27 +1000,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i1'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
@@ -1033,27 +1033,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i1'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
@@ -1066,27 +1066,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i1'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
@@ -1099,27 +1099,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i1'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
@@ -1131,61 +1131,94 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; AVX512-LABEL: 'insert_i1'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512F-LABEL: 'insert_i1'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'insert_i1'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i1'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
@@ -1198,27 +1231,27 @@ define i32 @insert_i1(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i1'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8


        


More information about the llvm-commits mailing list