[llvm] aca5f9a - [CostModel][X86] getMemoryOpCost - increase cost of sub-32-bit vector load/stores

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 23 13:58:28 PDT 2023


Author: Simon Pilgrim
Date: 2023-04-23T21:48:25+01:00
New Revision: aca5f9aeea8da8857235347ed1363ccda5460cbb

URL: https://github.com/llvm/llvm-project/commit/aca5f9aeea8da8857235347ed1363ccda5460cbb
DIFF: https://github.com/llvm/llvm-project/commit/aca5f9aeea8da8857235347ed1363ccda5460cbb.diff

LOG: [CostModel][X86] getMemoryOpCost - increase cost of sub-32-bit vector load/stores

For 8-bit/16-bit vector loads/stores we scalarize and transfer to/from the vector unit, or use the (usually slow) PINSR/PEXTR instructions.

Fixes #59867

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll
    llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
    llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
    llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll
    llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll
    llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll
    llvm/test/Analysis/CostModel/X86/load_store.ll
    llvm/test/Analysis/CostModel/X86/shuffle-load.ll
    llvm/test/Analysis/CostModel/X86/vector-extract.ll
    llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
    llvm/test/Analysis/CostModel/X86/vector-insert.ll
    llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr59867.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 158023fafa801..784c80301991c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4861,8 +4861,12 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
       // This isn't exactly right. We're using slow unaligned 32-byte accesses
       // as a proxy for a double-pumped AVX memory interface such as on
       // Sandybridge.
+      // Sub-32-bit loads/stores will be slower either with PINSR*/PEXTR* or
+      // will be scalarized.
       if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
         Cost += 2;
+      else if (CurrOpSizeBytes < 4)
+        Cost += 2;
       else
         Cost += 1;
 

diff  --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll
index 92972c4bce96b..6f8c3056b7aa7 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll
@@ -15,14 +15,14 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; SSE2:  LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 50 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 93 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 189 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX1:  LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 52 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
@@ -30,7 +30,7 @@ define void @test() {
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX2:  LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX2:  LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
@@ -38,7 +38,7 @@ define void @test() {
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512DQ:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1

diff  --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
index febfeeae84719..53df030a9ba7a 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
@@ -15,14 +15,14 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 37 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; SSE2:  LV: Found an estimated cost of 38 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 75 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 155 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 315 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX1:  LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 83 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
@@ -38,7 +38,7 @@ define void @test() {
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512DQ:  LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1

diff  --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
index e8db1b2af8b13..15f1e021eb32e 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
@@ -15,14 +15,14 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 56 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; SSE2:  LV: Found an estimated cost of 57 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 110 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 217 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; SSE2:  LV: Found an estimated cost of 441 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX1:  LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX1:  LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
@@ -38,7 +38,7 @@ define void @test() {
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512DQ:  LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1

diff  --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll
index e729e6227b5a8..2cfd81ac65d5b 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll
@@ -15,14 +15,14 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; SSE2:  LV: Found an estimated cost of 25 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
+; SSE2:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
 ; SSE2:  LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
 ; SSE2:  LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
 ; SSE2:  LV: Found an estimated cost of 204 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX1:  LV: Found an estimated cost of 15 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
+; AVX1:  LV: Found an estimated cost of 16 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX1:  LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX1:  LV: Found an estimated cost of 53 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX1:  LV: Found an estimated cost of 100 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
@@ -30,7 +30,7 @@ define void @test() {
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX2:  LV: Found an estimated cost of 7 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
+; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX2:  LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX2:  LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
@@ -38,7 +38,7 @@ define void @test() {
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
+; AVX512DQ:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1

diff  --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll
index 2e50c41d04e0c..eef594d855d3d 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll
@@ -15,14 +15,14 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; SSE2:  LV: Found an estimated cost of 43 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
+; SSE2:  LV: Found an estimated cost of 44 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
 ; SSE2:  LV: Found an estimated cost of 87 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
 ; SSE2:  LV: Found an estimated cost of 178 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
 ; SSE2:  LV: Found an estimated cost of 360 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX1:  LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
+; AVX1:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX1:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX1:  LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX1:  LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
@@ -30,7 +30,7 @@ define void @test() {
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX2:  LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
+; AVX2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX2:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX2:  LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX2:  LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
@@ -38,7 +38,7 @@ define void @test() {
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
+; AVX512DQ:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1

diff  --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll
index e4bbad11ffbef..5c7a6a8796234 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll
@@ -15,14 +15,14 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; SSE2:  LV: Found an estimated cost of 56 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
+; SSE2:  LV: Found an estimated cost of 57 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
 ; SSE2:  LV: Found an estimated cost of 112 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
 ; SSE2:  LV: Found an estimated cost of 225 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
 ; SSE2:  LV: Found an estimated cost of 456 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX1:  LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
+; AVX1:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX1:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX1:  LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX1:  LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
@@ -30,7 +30,7 @@ define void @test() {
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX2:  LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
+; AVX2:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX2:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX2:  LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX2:  LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
@@ -38,7 +38,7 @@ define void @test() {
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
+; AVX512DQ:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
 ; AVX512DQ:  LV: Found an estimated cost of 234 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1

diff  --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll
index 9183cb1bdf39a..ef36bd7070724 100644
--- a/llvm/test/Analysis/CostModel/X86/load_store.ll
+++ b/llvm/test/Analysis/CostModel/X86/load_store.ll
@@ -30,10 +30,10 @@ define i32 @stores_align4(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 4
@@ -81,10 +81,10 @@ define i32 @stores_align4(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 4
@@ -132,10 +132,10 @@ define i32 @stores_align4(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 4
@@ -183,11 +183,11 @@ define i32 @stores_align4(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i1> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 4
@@ -324,151 +324,151 @@ define i32 @stores_partial_align4(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <23 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i16> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <3 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <5 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <7 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <11 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <13 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <14 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <15 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <19 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <23 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <29 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <31 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <35 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <37 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <39 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <43 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <45 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <46 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <47 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <51 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <55 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <59 x i8> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <61 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: store <63 x i8> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <9 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <10 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <12 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <23 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <24 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <42 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <44 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <57 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <58 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <60 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i1> undef, ptr undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i1> undef, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'stores_partial_align4'
@@ -510,151 +510,151 @@ define i32 @stores_partial_align4(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <35 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <39 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <43 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <45 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i8> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, ptr undef, align 4
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <23 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <24 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <25 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <26 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <28 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <58 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <60 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i1> undef, ptr undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i1> undef, ptr undef, align 4
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'stores_partial_align4'
@@ -696,151 +696,151 @@ define i32 @stores_partial_align4(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <19 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <21 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <22 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <23 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <31 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <47 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <51 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <55 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i8> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <20 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <24 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <52 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <55 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <56 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <57 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <58 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <60 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i1> undef, ptr undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i1> undef, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'stores_partial_align4'
@@ -882,151 +882,151 @@ define i32 @stores_partial_align4(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <19 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <25 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i16> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <19 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <21 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <22 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <23 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <31 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <33 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <34 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <35 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <37 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <38 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <39 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <43 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <45 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <49 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <50 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <51 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <53 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <54 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <55 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <58 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <59 x i8> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <63 x i8> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <20 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <24 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <36 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <39 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <40 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <42 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <44 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <48 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <49 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <50 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <52 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <56 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <57 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <58 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <60 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i1> undef, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i1> undef, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   store <1 x i64> undef, ptr undef, align 4
@@ -1276,10 +1276,10 @@ define i32 @stores_align1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 1
@@ -1327,10 +1327,10 @@ define i32 @stores_align1(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 1
@@ -1378,10 +1378,10 @@ define i32 @stores_align1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 1
@@ -1429,11 +1429,11 @@ define i32 @stores_align1(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i1> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 1
@@ -1570,151 +1570,151 @@ define i32 @stores_partial_align1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <23 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i16> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <3 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <5 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <7 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <11 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <13 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <14 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <15 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <19 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <23 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <29 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <31 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <35 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <37 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <39 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <43 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <45 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <46 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <47 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <51 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <55 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <59 x i8> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <61 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: store <63 x i8> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <9 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <10 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <12 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <23 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <24 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <42 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <44 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <57 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <58 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <60 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i1> undef, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i1> undef, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'stores_partial_align1'
@@ -1756,151 +1756,151 @@ define i32 @stores_partial_align1(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <35 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <39 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <43 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <45 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i8> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <23 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <24 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <25 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <26 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <28 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <58 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <60 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i1> undef, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i1> undef, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'stores_partial_align1'
@@ -1942,151 +1942,151 @@ define i32 @stores_partial_align1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <19 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <21 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <22 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <23 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <31 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <47 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <51 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <55 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i8> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <20 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <24 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <52 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <55 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <56 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <57 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <58 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <60 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i1> undef, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i1> undef, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'stores_partial_align1'
@@ -2128,151 +2128,151 @@ define i32 @stores_partial_align1(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <19 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <25 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i16> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <19 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <21 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <22 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <23 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <31 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <33 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <34 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <35 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <37 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <38 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <39 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <43 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <45 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <49 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <50 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <51 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <53 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <54 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <55 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <58 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <59 x i8> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <63 x i8> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <20 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <24 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <36 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <39 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <40 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <42 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <44 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <48 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <49 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <50 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <52 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <56 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <57 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <58 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <60 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i1> undef, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i1> undef, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   store <1 x i64> undef, ptr undef, align 1
@@ -2522,10 +2522,10 @@ define i32 @stores_align64(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 64
@@ -2573,10 +2573,10 @@ define i32 @stores_align64(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 64
@@ -2624,10 +2624,10 @@ define i32 @stores_align64(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 64
@@ -2675,11 +2675,11 @@ define i32 @stores_align64(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i1> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 64
@@ -2816,151 +2816,151 @@ define i32 @stores_partial_align64(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <23 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i16> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <3 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <5 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <7 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <11 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <13 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <14 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <15 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <19 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <23 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <29 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <31 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <35 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <37 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <39 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <43 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <45 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <46 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <47 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <51 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <55 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <59 x i8> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <61 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: store <63 x i8> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <9 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <10 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <12 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <23 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <24 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <42 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <44 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <57 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <58 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <60 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i1> undef, ptr undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i1> undef, ptr undef, align 64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'stores_partial_align64'
@@ -3002,151 +3002,151 @@ define i32 @stores_partial_align64(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <35 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <39 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <43 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <45 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i8> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, ptr undef, align 64
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <23 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <24 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <25 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <26 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <28 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <58 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <60 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i1> undef, ptr undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i1> undef, ptr undef, align 64
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'stores_partial_align64'
@@ -3188,151 +3188,151 @@ define i32 @stores_partial_align64(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <19 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <21 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <22 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <23 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <31 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <47 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <51 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <55 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i8> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <20 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <24 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <44 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <46 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <52 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <54 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <55 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <56 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <57 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <58 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <60 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i1> undef, ptr undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i1> undef, ptr undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'stores_partial_align64'
@@ -3374,151 +3374,151 @@ define i32 @stores_partial_align64(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <15 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <19 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <25 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <31 x i16> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <3 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <7 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <10 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <11 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <13 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <19 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <21 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <22 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <23 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <25 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <26 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <27 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <31 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <33 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <34 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <35 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <37 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <38 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <39 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <42 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <43 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <45 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <49 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <50 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <51 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <53 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <54 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: store <55 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <57 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <58 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <59 x i8> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <63 x i8> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <7 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <9 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <10 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <12 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <14 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <17 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <18 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <20 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <22 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <24 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <25 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <26 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <27 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <28 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <29 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <30 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <31 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <34 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <36 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <39 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <40 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <41 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <42 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <44 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <46 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <48 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <49 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <50 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <52 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <54 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <56 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <57 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <58 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <59 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <60 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <61 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <62 x i1> undef, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <63 x i1> undef, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   store <1 x i64> undef, ptr undef, align 64
@@ -3921,11 +3921,11 @@ define i32 @loads_align4(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <16 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <4 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = load <2 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <1 x i1>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 4
@@ -4708,17 +4708,17 @@ define i32 @loads_partial_align4(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %127 = load <3 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %128 = load <5 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %129 = load <6 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %130 = load <7 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %131 = load <9 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %132 = load <10 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %133 = load <11 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %134 = load <12 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %135 = load <13 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %136 = load <14 x i1>, ptr undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %137 = load <15 x i1>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 4
@@ -5014,10 +5014,10 @@ define i32 @loads_align1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = load <2 x i1>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 1
@@ -5065,10 +5065,10 @@ define i32 @loads_align1(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = load <2 x i1>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 1
@@ -5116,10 +5116,10 @@ define i32 @loads_align1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = load <2 x i1>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = load <256 x i1>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 1
@@ -5167,11 +5167,11 @@ define i32 @loads_align1(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <1 x i1>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 1
@@ -5308,151 +5308,151 @@ define i32 @loads_partial_align1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %36 = load <15 x float>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %39 = load <3 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %41 = load <5 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %42 = load <6 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %43 = load <7 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %46 = load <11 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %49 = load <14 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %50 = load <15 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %55 = load <21 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %56 = load <22 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %57 = load <23 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %64 = load <30 x i16>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %65 = load <31 x i16>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %68 = load <3 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %70 = load <5 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %72 = load <7 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %74 = load <9 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %76 = load <11 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %77 = load <12 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %78 = load <13 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %79 = load <14 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %80 = load <15 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %83 = load <19 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %85 = load <21 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %87 = load <23 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %89 = load <25 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %91 = load <27 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %92 = load <28 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %93 = load <29 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %94 = load <30 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %95 = load <31 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %98 = load <35 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %100 = load <37 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %102 = load <39 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %104 = load <41 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %106 = load <43 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %107 = load <44 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %108 = load <45 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %109 = load <46 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %110 = load <47 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %114 = load <51 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %116 = load <53 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %118 = load <55 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %120 = load <57 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %122 = load <59 x i8>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %123 = load <60 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %124 = load <61 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %125 = load <62 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %126 = load <63 x i8>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %131 = load <9 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %132 = load <10 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %133 = load <11 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %134 = load <12 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %135 = load <13 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %136 = load <14 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %137 = load <15 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %138 = load <17 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %139 = load <18 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %140 = load <19 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %141 = load <20 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %142 = load <21 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %143 = load <22 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %144 = load <23 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %145 = load <24 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %146 = load <25 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %147 = load <26 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %148 = load <27 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %149 = load <28 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %150 = load <29 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %151 = load <30 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %152 = load <31 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %161 = load <41 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %162 = load <42 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %163 = load <43 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %164 = load <44 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %165 = load <45 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %166 = load <46 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %167 = load <47 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %177 = load <57 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %178 = load <58 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %179 = load <59 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %180 = load <60 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %181 = load <61 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %182 = load <62 x i1>, ptr undef, align 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %183 = load <63 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %183 = load <63 x i1>, ptr undef, align 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'loads_partial_align1'
@@ -5494,151 +5494,151 @@ define i32 @loads_partial_align1(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %39 = load <3 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %41 = load <5 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %46 = load <11 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %49 = load <14 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %50 = load <15 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %55 = load <21 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %57 = load <23 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %65 = load <31 x i16>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %68 = load <3 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %72 = load <7 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %76 = load <11 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %87 = load <23 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %91 = load <27 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %92 = load <28 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %93 = load <29 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %94 = load <30 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %98 = load <35 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %102 = load <39 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %106 = load <43 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %110 = load <47 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %114 = load <51 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %118 = load <55 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %122 = load <59 x i8>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %126 = load <63 x i8>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %138 = load <17 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %139 = load <18 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %140 = load <19 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %141 = load <20 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %142 = load <21 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %143 = load <22 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %144 = load <23 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %145 = load <24 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %146 = load <25 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %147 = load <26 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %148 = load <27 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %149 = load <28 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %150 = load <29 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %151 = load <30 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %152 = load <31 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %161 = load <41 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %162 = load <42 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %163 = load <43 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %164 = load <44 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %165 = load <45 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %166 = load <46 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %167 = load <47 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %177 = load <57 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %178 = load <58 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %179 = load <59 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %180 = load <60 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %181 = load <61 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %182 = load <62 x i1>, ptr undef, align 1
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %183 = load <63 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %183 = load <63 x i1>, ptr undef, align 1
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'loads_partial_align1'
@@ -5680,151 +5680,151 @@ define i32 @loads_partial_align1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %39 = load <3 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %41 = load <5 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %46 = load <11 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %60 = load <26 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %61 = load <27 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %65 = load <31 x i16>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %68 = load <3 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %72 = load <7 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %76 = load <11 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %83 = load <19 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %87 = load <23 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %91 = load <27 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %95 = load <31 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %96 = load <33 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %98 = load <35 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %100 = load <37 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %102 = load <39 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %104 = load <41 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %106 = load <43 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %109 = load <46 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %110 = load <47 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %113 = load <50 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %114 = load <51 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %115 = load <52 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %117 = load <54 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %118 = load <55 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %122 = load <59 x i8>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %126 = load <63 x i8>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %138 = load <17 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %139 = load <18 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %140 = load <19 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %141 = load <20 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %142 = load <21 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %143 = load <22 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %144 = load <23 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %145 = load <24 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %146 = load <25 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %147 = load <26 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %148 = load <27 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %149 = load <28 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %150 = load <29 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %151 = load <30 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %152 = load <31 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %161 = load <41 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %162 = load <42 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %163 = load <43 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %164 = load <44 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %165 = load <45 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %166 = load <46 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %167 = load <47 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %169 = load <49 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %170 = load <50 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %171 = load <51 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %172 = load <52 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %173 = load <53 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %174 = load <54 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %175 = load <55 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %176 = load <56 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %177 = load <57 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %178 = load <58 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %179 = load <59 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %180 = load <60 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %181 = load <61 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %182 = load <62 x i1>, ptr undef, align 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %183 = load <63 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %183 = load <63 x i1>, ptr undef, align 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'loads_partial_align1'
@@ -5866,151 +5866,151 @@ define i32 @loads_partial_align1(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %39 = load <3 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %41 = load <5 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %46 = load <11 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %51 = load <17 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %52 = load <18 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %53 = load <19 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %55 = load <21 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %57 = load <23 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %59 = load <25 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %60 = load <26 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %61 = load <27 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %62 = load <28 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %63 = load <29 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %64 = load <30 x i16>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %65 = load <31 x i16>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %68 = load <3 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %72 = load <7 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %76 = load <11 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %83 = load <19 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %87 = load <23 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %91 = load <27 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %95 = load <31 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %97 = load <34 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %98 = load <35 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %99 = load <36 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %101 = load <38 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %102 = load <39 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %106 = load <43 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %110 = load <47 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %113 = load <50 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %114 = load <51 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %115 = load <52 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %116 = load <53 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %117 = load <54 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %118 = load <55 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %119 = load <56 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %122 = load <59 x i8>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %123 = load <60 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %126 = load <63 x i8>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %138 = load <17 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %139 = load <18 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %140 = load <19 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %141 = load <20 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %142 = load <21 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %143 = load <22 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %144 = load <23 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %145 = load <24 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %146 = load <25 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %147 = load <26 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %148 = load <27 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %149 = load <28 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %150 = load <29 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %151 = load <30 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %152 = load <31 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %161 = load <41 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %162 = load <42 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %163 = load <43 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %164 = load <44 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %165 = load <45 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %166 = load <46 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %167 = load <47 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %169 = load <49 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %170 = load <50 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %171 = load <51 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %172 = load <52 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %173 = load <53 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %174 = load <54 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %175 = load <55 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %176 = load <56 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %177 = load <57 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %178 = load <58 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %179 = load <59 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %180 = load <60 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %181 = load <61 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %182 = load <62 x i1>, ptr undef, align 1
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %183 = load <63 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %183 = load <63 x i1>, ptr undef, align 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   load <1 x i64>, ptr undef, align 1
@@ -6413,11 +6413,11 @@ define i32 @loads_align64(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <16 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <4 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = load <2 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <1 x i1>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 64
@@ -7200,17 +7200,17 @@ define i32 @loads_partial_align64(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %124 = load <61 x i8>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %125 = load <62 x i8>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %126 = load <63 x i8>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %127 = load <3 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %128 = load <5 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %129 = load <6 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %130 = load <7 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %131 = load <9 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %132 = load <10 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %133 = load <11 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %134 = load <12 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %135 = load <13 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %136 = load <14 x i1>, ptr undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %137 = load <15 x i1>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 64

diff  --git a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
index 80463331008da..12d0545012772 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
@@ -25,7 +25,7 @@
 
 define void @shuffle_load() {
 ; SSE-LABEL: 'shuffle_load'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -88,7 +88,7 @@ define void @shuffle_load() {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE2-LABEL: 'shuffle_load'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -151,7 +151,7 @@ define void @shuffle_load() {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE3-LABEL: 'shuffle_load'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -214,7 +214,7 @@ define void @shuffle_load() {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX-LABEL: 'shuffle_load'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -277,7 +277,7 @@ define void @shuffle_load() {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'shuffle_load'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -340,7 +340,7 @@ define void @shuffle_load() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'shuffle_load'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-extract.ll b/llvm/test/Analysis/CostModel/X86/vector-extract.ll
index 036deb31326a1..b5eefbdb19afe 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-extract.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-extract.ll
@@ -693,7 +693,7 @@ define i32 @extract_i16(i32 %arg) {
 
 define i32 @extract_i8(i32 %arg) {
 ; SSE2-LABEL: 'extract_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -726,7 +726,7 @@ define i32 @extract_i8(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'extract_i8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -759,7 +759,7 @@ define i32 @extract_i8(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'extract_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -792,7 +792,7 @@ define i32 @extract_i8(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'extract_i8'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -825,7 +825,7 @@ define i32 @extract_i8(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -858,7 +858,7 @@ define i32 @extract_i8(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'extract_i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -891,7 +891,7 @@ define i32 @extract_i8(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'extract_i8'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -924,7 +924,7 @@ define i32 @extract_i8(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'extract_i8'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
@@ -997,16 +997,16 @@ define i32 @extract_i8(i32 %arg) {
 
 define i32 @extract_i1(i32 %arg) {
 ; SSE-LABEL: 'extract_i1'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
@@ -1030,16 +1030,16 @@ define i32 @extract_i1(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'extract_i1'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
@@ -1063,27 +1063,27 @@ define i32 @extract_i1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'extract_i1'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = extractelement <32 x i1> undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = extractelement <32 x i1> undef, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = extractelement <32 x i1> undef, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = extractelement <32 x i1> undef, i32 15
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = extractelement <32 x i1> undef, i32 24
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = extractelement <32 x i1> undef, i32 31
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = extractelement <64 x i1> undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = extractelement <64 x i1> undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = extractelement <64 x i1> undef, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = extractelement <64 x i1> undef, i32 8
@@ -1096,16 +1096,16 @@ define i32 @extract_i1(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'extract_i1'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = extractelement <2 x i1> undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = extractelement <4 x i1> undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = extractelement <4 x i1> undef, i32 2
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = extractelement <8 x i1> undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = extractelement <8 x i1> undef, i32 4
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = extractelement <16 x i1> undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = extractelement <16 x i1> undef, i32 8
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = extractelement <16 x i1> undef, i32 15

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
index e6a4de688186e..9caaae0717fda 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
@@ -663,7 +663,7 @@ define i32 @insert_i16(i32 %arg) {
 
 define i32 @insert_i8(i32 %arg) {
 ; SSE2-LABEL: 'insert_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -696,7 +696,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -729,7 +729,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -762,7 +762,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i8'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -795,7 +795,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -828,7 +828,7 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -861,7 +861,7 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i8'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -894,7 +894,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i8'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
@@ -967,16 +967,16 @@ define i32 @insert_i8(i32 %arg) {
 
 define i32 @insert_i1(i32 %arg) {
 ; SSE2-LABEL: 'insert_i1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
@@ -1000,16 +1000,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i1'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
@@ -1033,16 +1033,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i1'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
@@ -1066,16 +1066,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i1'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
@@ -1099,16 +1099,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i1'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
@@ -1132,27 +1132,27 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'insert_i1'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
@@ -1165,16 +1165,16 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'insert_i1'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
@@ -1198,16 +1198,16 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i1'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
@@ -1231,16 +1231,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i1'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
index 80613b65e99d3..252497643a4f3 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
@@ -663,7 +663,7 @@ define i32 @insert_i16(i32 %arg, i16 %val, <2 x i16> %src32, <4 x i16> %src64, <
 
 define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
 ; SSE2-LABEL: 'insert_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -696,7 +696,7 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -729,7 +729,7 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -762,7 +762,7 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i8'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -795,7 +795,7 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -828,7 +828,7 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -861,7 +861,7 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i8'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -894,7 +894,7 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i8'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
@@ -967,16 +967,16 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x
 
 define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> %src16, <32 x i1> %src32, <64 x i1> %src64) {
 ; SSE2-LABEL: 'insert_i1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
@@ -1000,16 +1000,16 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i1'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
@@ -1033,16 +1033,16 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i1'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
@@ -1066,16 +1066,16 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i1'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
@@ -1099,16 +1099,16 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i1'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
@@ -1132,27 +1132,27 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'insert_i1'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = insertelement <64 x i1> %src64, i1 %val, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i1_a = insertelement <64 x i1> %src64, i1 %val, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8
@@ -1165,16 +1165,16 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'insert_i1'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
@@ -1198,16 +1198,16 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i1'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
@@ -1231,16 +1231,16 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i1'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-insert.ll b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
index fd40da9ff8b76..1d2e1f23b9b53 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
@@ -663,7 +663,7 @@ define i32 @insert_i16(i32 %arg) {
 
 define i32 @insert_i8(i32 %arg) {
 ; SSE2-LABEL: 'insert_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -696,7 +696,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i8'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -729,7 +729,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -762,7 +762,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i8'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -795,7 +795,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -828,7 +828,7 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'insert_i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -861,7 +861,7 @@ define i32 @insert_i8(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i8'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -894,7 +894,7 @@ define i32 @insert_i8(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i8'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
@@ -967,16 +967,16 @@ define i32 @insert_i8(i32 %arg) {
 
 define i32 @insert_i1(i32 %arg) {
 ; SSE2-LABEL: 'insert_i1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
@@ -1000,16 +1000,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i1'
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
@@ -1033,16 +1033,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i1'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
@@ -1066,16 +1066,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE4-LABEL: 'insert_i1'
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SSE4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SSE4-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
@@ -1099,16 +1099,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'insert_i1'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
@@ -1132,27 +1132,27 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'insert_i1'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
@@ -1165,16 +1165,16 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'insert_i1'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
@@ -1198,16 +1198,16 @@ define i32 @insert_i1(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'insert_i1'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
@@ -1231,16 +1231,16 @@ define i32 @insert_i1(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'insert_i1'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll
index edac08cc02c09..cb44d05423007 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64    -S | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=SSE,SSE4
-; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64    -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
+; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=CHECK,AVX512
 
 ; // PR42652
 ; unsigned long bitmask_16xi8(const char *src) {
@@ -15,105 +15,39 @@
 ; }
 
 define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) {
-; SSE-LABEL: @bitmask_16xi8(
-; SSE-NEXT:  entry:
-; SSE-NEXT:    [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
-; SSE-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i8 [[TMP0]], 0
-; SSE-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
-; SSE-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_1]], align 1
-; SSE-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i8> [[TMP1]], zeroinitializer
-; SSE-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ARRAYIDX_9]], align 1
-; SSE-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
-; SSE-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> zeroinitializer, <4 x i64> <i64 512, i64 1024, i64 2048, i64 4096>
-; SSE-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 13
-; SSE-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX_13]], align 1
-; SSE-NEXT:    [[TOBOOL_NOT_13:%.*]] = icmp eq i8 [[TMP7]], 0
-; SSE-NEXT:    [[OR_13:%.*]] = select i1 [[TOBOOL_NOT_13]], i64 0, i64 8192
-; SSE-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 14
-; SSE-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX_14]], align 1
-; SSE-NEXT:    [[TOBOOL_NOT_14:%.*]] = icmp eq i8 [[TMP8]], 0
-; SSE-NEXT:    [[OR_14:%.*]] = select i1 [[TOBOOL_NOT_14]], i64 0, i64 16384
-; SSE-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 15
-; SSE-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1
-; SSE-NEXT:    [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0
-; SSE-NEXT:    [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768
-; SSE-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP3]])
-; SSE-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP6]])
-; SSE-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; SSE-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OP_RDX]], [[OR_13]]
-; SSE-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]]
-; SSE-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX1]], [[OP_RDX2]]
-; SSE-NEXT:    [[OP_RDX4:%.*]] = or i64 [[OP_RDX3]], [[OR]]
-; SSE-NEXT:    ret i64 [[OP_RDX4]]
-;
-; AVX-LABEL: @bitmask_16xi8(
-; AVX-NEXT:  entry:
-; AVX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
-; AVX-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i8 [[TMP0]], 0
-; AVX-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
-; AVX-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_1]], align 1
-; AVX-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i8> [[TMP1]], zeroinitializer
-; AVX-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ARRAYIDX_9]], align 1
-; AVX-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
-; AVX-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> zeroinitializer, <4 x i64> <i64 512, i64 1024, i64 2048, i64 4096>
-; AVX-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 13
-; AVX-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX_13]], align 1
-; AVX-NEXT:    [[TOBOOL_NOT_13:%.*]] = icmp eq i8 [[TMP7]], 0
-; AVX-NEXT:    [[OR_13:%.*]] = select i1 [[TOBOOL_NOT_13]], i64 0, i64 8192
-; AVX-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 14
-; AVX-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX_14]], align 1
-; AVX-NEXT:    [[TOBOOL_NOT_14:%.*]] = icmp eq i8 [[TMP8]], 0
-; AVX-NEXT:    [[OR_14:%.*]] = select i1 [[TOBOOL_NOT_14]], i64 0, i64 16384
-; AVX-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 15
-; AVX-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1
-; AVX-NEXT:    [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0
-; AVX-NEXT:    [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768
-; AVX-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP3]])
-; AVX-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP6]])
-; AVX-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; AVX-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OP_RDX]], [[OR_13]]
-; AVX-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]]
-; AVX-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX1]], [[OP_RDX2]]
-; AVX-NEXT:    [[OP_RDX4:%.*]] = or i64 [[OP_RDX3]], [[OR]]
-; AVX-NEXT:    ret i64 [[OP_RDX4]]
-;
-; AVX512-LABEL: @bitmask_16xi8(
-; AVX512-NEXT:  entry:
-; AVX512-NEXT:    [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
-; AVX512-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i8 [[TMP0]], 0
-; AVX512-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
-; AVX512-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_1]], align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i8> [[TMP1]], zeroinitializer
-; AVX512-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
-; AVX512-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ARRAYIDX_9]], align 1
-; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
-; AVX512-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> zeroinitializer, <4 x i64> <i64 512, i64 1024, i64 2048, i64 4096>
-; AVX512-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 13
-; AVX512-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX_13]], align 1
-; AVX512-NEXT:    [[TOBOOL_NOT_13:%.*]] = icmp eq i8 [[TMP7]], 0
-; AVX512-NEXT:    [[OR_13:%.*]] = select i1 [[TOBOOL_NOT_13]], i64 0, i64 8192
-; AVX512-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 14
-; AVX512-NEXT:    [[TMP8:%.*]] = load <2 x i8>, ptr [[ARRAYIDX_14]], align 1
-; AVX512-NEXT:    [[TMP9:%.*]] = icmp eq <2 x i8> [[TMP8]], zeroinitializer
-; AVX512-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i64> zeroinitializer, <2 x i64> <i64 16384, i64 32768>
-; AVX512-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP3]])
-; AVX512-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP6]])
-; AVX512-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP11]], [[TMP12]]
-; AVX512-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OP_RDX]], [[OR_13]]
-; AVX512-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
-; AVX512-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
-; AVX512-NEXT:    [[OP_RDX2:%.*]] = or i64 [[TMP13]], [[TMP14]]
-; AVX512-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX1]], [[OP_RDX2]]
-; AVX512-NEXT:    [[OP_RDX4:%.*]] = or i64 [[OP_RDX3]], [[OR]]
-; AVX512-NEXT:    ret i64 [[OP_RDX4]]
+; CHECK-LABEL: @bitmask_16xi8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i8 [[TMP0]], 0
+; CHECK-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ARRAYIDX_9]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> zeroinitializer, <4 x i64> <i64 512, i64 1024, i64 2048, i64 4096>
+; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 13
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX_13]], align 1
+; CHECK-NEXT:    [[TOBOOL_NOT_13:%.*]] = icmp eq i8 [[TMP7]], 0
+; CHECK-NEXT:    [[OR_13:%.*]] = select i1 [[TOBOOL_NOT_13]], i64 0, i64 8192
+; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 14
+; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX_14]], align 1
+; CHECK-NEXT:    [[TOBOOL_NOT_14:%.*]] = icmp eq i8 [[TMP8]], 0
+; CHECK-NEXT:    [[OR_14:%.*]] = select i1 [[TOBOOL_NOT_14]], i64 0, i64 16384
+; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 15
+; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1
+; CHECK-NEXT:    [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0
+; CHECK-NEXT:    [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768
+; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP3]])
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP6]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OP_RDX]], [[OR_13]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX1]], [[OP_RDX2]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = or i64 [[OP_RDX3]], [[OR]]
+; CHECK-NEXT:    ret i64 [[OP_RDX4]]
 ;
 entry:
   %0 = load i8, ptr %src, align 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr59867.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr59867.ll
index 25d267f15e9ae..0148cd512625c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr59867.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr59867.ll
@@ -6,12 +6,15 @@
 
 define i1 @PR59867(ptr %s1, ptr %s2) {
 ; CHECK-LABEL: @PR59867(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[S1:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[S2:%.*]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
-; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP4]], i1 [[TMP5]], i1 false
+; CHECK-NEXT:    [[V1_1:%.*]] = load i8, ptr [[S1:%.*]], align 1
+; CHECK-NEXT:    [[V2_1:%.*]] = load i8, ptr [[S2:%.*]], align 1
+; CHECK-NEXT:    [[C1:%.*]] = icmp eq i8 [[V1_1]], [[V2_1]]
+; CHECK-NEXT:    [[S1_2:%.*]] = getelementptr inbounds i8, ptr [[S1]], i64 1
+; CHECK-NEXT:    [[V1_2:%.*]] = load i8, ptr [[S1_2]], align 1
+; CHECK-NEXT:    [[S2_2:%.*]] = getelementptr inbounds i8, ptr [[S2]], i64 1
+; CHECK-NEXT:    [[V2_2:%.*]] = load i8, ptr [[S2_2]], align 1
+; CHECK-NEXT:    [[C2:%.*]] = icmp eq i8 [[V1_2]], [[V2_2]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
   %v1.1 = load i8, ptr %s1, align 1


        


More information about the llvm-commits mailing list