[llvm] b9fc477 - [NFC][X86][CostModel] Rewrite load_store.ll

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 18 01:15:00 PDT 2021


Author: Roman Lebedev
Date: 2021-04-18T11:12:36+03:00
New Revision: b9fc47745a6ff1c97505a65463dfe474e332421d

URL: https://github.com/llvm/llvm-project/commit/b9fc47745a6ff1c97505a65463dfe474e332421d
DIFF: https://github.com/llvm/llvm-project/commit/b9fc47745a6ff1c97505a65463dfe474e332421d.diff

LOG: [NFC][X86][CostModel] Rewrite load_store.ll

Test SSE41, since that added float/i64/i32/i8 inserts/extracts.
Don't forget to test vectors of pointers.
Do test byte-aligned loads/stores.
Fixup test coverage to be rather more exhaustive,
testing all reasonable element sizes vs element counts permutations
that fit up to witin ZMM.

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/X86/load_store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll
index 2614043d0e829..7523b6b2da668 100644
--- a/llvm/test/Analysis/CostModel/X86/load_store.ll
+++ b/llvm/test/Analysis/CostModel/X86/load_store.ll
@@ -1,382 +1,2851 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx  | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-define i32 @stores(i32 %arg) {
-; SSE-LABEL: 'stores'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX-LABEL: 'stores'
+define i32 @stores_align4(i32 %arg) {
+  ; Scalars
+; SSE2-LABEL: 'stores_align4'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'stores_align4'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'stores_align4'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'stores_align4'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  store i8 undef, i8* undef, align 4
+  store i16 undef, i16* undef, align 4
+  store i32 undef, i32* undef, align 4
+  store i64 undef, i64* undef, align 4
+  store i128 undef, i128* undef, align 4
+  store i256 undef, i256* undef, align 4
+  store i512 undef, i512* undef, align 4
+
+  store float undef, float* undef, align 4
+  store double undef, double* undef, align 4
+
+  store i8* undef, i8** undef, align 4
+
+  ; XMM (128-bit) vectors
+  store <16 x i8> undef, <16 x i8>* undef, align 4
+  store <8 x i16> undef, <8 x i16>* undef, align 4
+  store <4 x i32> undef, <4 x i32>* undef, align 4
+  store <2 x i64> undef, <2 x i64>* undef, align 4
+  store <1 x i128> undef, <1 x i128>* undef, align 4
+
+  store <4 x float> undef, <4 x float>* undef, align 4
+  store <2 x double> undef, <2 x double>* undef, align 4
+
+  store <2 x i8*> undef, <2 x i8*>* undef, align 4
+
+  ; YMM (256-bit) vectors
+  store <32 x i8> undef, <32 x i8>* undef, align 4
+  store <16 x i16> undef, <16 x i16>* undef, align 4
+  store <8 x i32> undef, <8 x i32>* undef, align 4
+  store <4 x i64> undef, <4 x i64>* undef, align 4
+  store <2 x i128> undef, <2 x i128>* undef, align 4
+  store <1 x i256> undef, <1 x i256>* undef, align 4
+
+  store <8 x float> undef, <8 x float>* undef, align 4
+  store <4 x double> undef, <4 x double>* undef, align 4
+
+  store <4 x i8*> undef, <4 x i8*>* undef, align 4
+
+  ; ZMM (512-bit) vectors
+  store <64 x i8> undef, <64 x i8>* undef, align 4
+  store <32 x i16> undef, <32 x i16>* undef, align 4
+  store <16 x i32> undef, <16 x i32>* undef, align 4
+  store <8 x i64> undef, <8 x i64>* undef, align 4
+  store <4 x i128> undef, <4 x i128>* undef, align 4
+  store <2 x i256> undef, <2 x i256>* undef, align 4
+  store <1 x i512> undef, <1 x i512>* undef, align 4
+
+  store <16 x float> undef, <16 x float>* undef, align 4
+  store <8 x double> undef, <8 x double>* undef, align 4
+
+  store <8 x i8*> undef, <8 x i8*>* undef, align 4
+
+  ret i32 undef
+}
+
+define i32 @stores_partial_align4(i32 %arg) {
+  ; Partial vectors with i64 elements (doubles as pointer-sized tests))
+; SSE2-LABEL: 'stores_partial_align4'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'stores_partial_align4'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'stores_partial_align4'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; AVX512-LABEL: 'stores'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
+; AVX512-LABEL: 'stores_partial_align4'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-  store i8 undef, i8* undef, align 4
-  store i16 undef, i16* undef, align 4
-  store i32 undef, i32* undef, align 4
-  store i64 undef, i64* undef, align 4
-  store i128 undef, i128* undef, align 4
-
-  store <4 x i16> undef, <4 x i16>* undef, align 4
-  store <4 x i32> undef, <4 x i32>* undef, align 4
-  store <4 x i64> undef, <4 x i64>* undef, align 4
-
-  store <8 x i16> undef, <8 x i16>* undef, align 4
-  store <8 x i32> undef, <8 x i32>* undef, align 4
-  store <8 x i64> undef, <8 x i64>* undef, align 4
+  store <1 x i64> undef, <1 x i64>* undef, align 4
+  ; <2 x i64> is XMM
+  store <3 x i64> undef, <3 x i64>* undef, align 4
+  ; <4 x i64> is YMM
+  store <5 x i64> undef, <5 x i64>* undef, align 4
+  store <6 x i64> undef, <6 x i64>* undef, align 4
+  store <7 x i64> undef, <7 x i64>* undef, align 4
+  ; <8 x i64> is ZMM
 
-  store <3 x float> undef, <3 x float>* undef, align 4
+  ; Partial vectors with double elements
+  store <1 x double> undef, <1 x double>* undef, align 4
+  ; <2 x double> is XMM
   store <3 x double> undef, <3 x double>* undef, align 4
+  ; <4 x double> is YMM
+  store <5 x double> undef, <5 x double>* undef, align 4
+  store <6 x double> undef, <6 x double>* undef, align 4
+  store <7 x double> undef, <7 x double>* undef, align 4
+  ; <8 x double> is ZMM
 
+  ; Partial vectors with i32 elements
+  store <1 x i32> undef, <1 x i32>* undef, align 4
+  store <2 x i32> undef, <2 x i32>* undef, align 4
   store <3 x i32> undef, <3 x i32>* undef, align 4
-  store <3 x i64> undef, <3 x i64>* undef, align 4
+  ; <4 x i32> is XMM
   store <5 x i32> undef, <5 x i32>* undef, align 4
-  store <5 x i64> undef, <5 x i64>* undef, align 4
+  store <6 x i32> undef, <6 x i32>* undef, align 4
+  store <7 x i32> undef, <7 x i32>* undef, align 4
+  ; <8 x i32> is YMM
+  store <9 x i32> undef, <9 x i32>* undef, align 4
+  store <10 x i32> undef, <10 x i32>* undef, align 4
+  store <11 x i32> undef, <11 x i32>* undef, align 4
+  store <12 x i32> undef, <12 x i32>* undef, align 4
+  store <13 x i32> undef, <13 x i32>* undef, align 4
+  store <14 x i32> undef, <14 x i32>* undef, align 4
+  store <15 x i32> undef, <15 x i32>* undef, align 4
+  ; <16 x i32> is ZMM
+
+  ; Partial vectors with float elements
+  store <1 x float> undef, <1 x float>* undef, align 4
+  store <2 x float> undef, <2 x float>* undef, align 4
+  store <3 x float> undef, <3 x float>* undef, align 4
+  ; <4 x float> is XMM
+  store <5 x float> undef, <5 x float>* undef, align 4
+  store <6 x float> undef, <6 x float>* undef, align 4
+  store <7 x float> undef, <7 x float>* undef, align 4
+  ; <8 x float> is YMM
+  store <9 x float> undef, <9 x float>* undef, align 4
+  store <10 x float> undef, <10 x float>* undef, align 4
+  store <11 x float> undef, <11 x float>* undef, align 4
+  store <12 x float> undef, <12 x float>* undef, align 4
+  store <13 x float> undef, <13 x float>* undef, align 4
+  store <14 x float> undef, <14 x float>* undef, align 4
+  store <15 x float> undef, <15 x float>* undef, align 4
+  ; <16 x float> is ZMM
 
+  ; Partial vectors with i16 elements
+  store <1 x i16> undef, <1 x i16>* undef, align 4
+  store <2 x i16> undef, <2 x i16>* undef, align 4
+  store <3 x i16> undef, <3 x i16>* undef, align 4
+  store <4 x i16> undef, <4 x i16>* undef, align 4
   store <5 x i16> undef, <5 x i16>* undef, align 4
-  store <6 x i16> undef, <6 x i16>* undef, align 4 ; 1 x i64 + 1 x i32
+  store <6 x i16> undef, <6 x i16>* undef, align 4
   store <7 x i16> undef, <7 x i16>* undef, align 4
-
+  ; <8 x i16> is XMM
+  store <9 x i16> undef, <9 x i16>* undef, align 4
+  store <10 x i16> undef, <10 x i16>* undef, align 4
   store <11 x i16> undef, <11 x i16>* undef, align 4
-  store <12 x i16> undef, <12 x i16>* undef, align 4 ; 1 x i128 + 1 x i64
+  store <12 x i16> undef, <12 x i16>* undef, align 4
   store <13 x i16> undef, <13 x i16>* undef, align 4
-
+  store <14 x i16> undef, <14 x i16>* undef, align 4
+  store <15 x i16> undef, <15 x i16>* undef, align 4
+  ; <16 x i16> is YMM
+  store <17 x i16> undef, <17 x i16>* undef, align 4
+  store <18 x i16> undef, <18 x i16>* undef, align 4
+  store <19 x i16> undef, <19 x i16>* undef, align 4
+  store <20 x i16> undef, <20 x i16>* undef, align 4
+  store <21 x i16> undef, <21 x i16>* undef, align 4
+  store <22 x i16> undef, <22 x i16>* undef, align 4
   store <23 x i16> undef, <23 x i16>* undef, align 4
-  store <24 x i16> undef, <24 x i16>* undef, align 4 ; 1 x i256 + 1 x i128
+  store <24 x i16> undef, <24 x i16>* undef, align 4
   store <25 x i16> undef, <25 x i16>* undef, align 4
+  store <26 x i16> undef, <26 x i16>* undef, align 4
+  store <27 x i16> undef, <27 x i16>* undef, align 4
+  store <28 x i16> undef, <28 x i16>* undef, align 4
+  store <29 x i16> undef, <29 x i16>* undef, align 4
+  store <30 x i16> undef, <30 x i16>* undef, align 4
+  store <31 x i16> undef, <31 x i16>* undef, align 4
+  ; <32 x i16> is ZMM
+
+  ; Partial vectors with i8 elements
+  store <1 x i8> undef, <1 x i8>* undef, align 4
+  store <2 x i8> undef, <2 x i8>* undef, align 4
+  store <3 x i8> undef, <3 x i8>* undef, align 4
+  store <4 x i8> undef, <4 x i8>* undef, align 4
+  store <5 x i8> undef, <5 x i8>* undef, align 4
+  store <6 x i8> undef, <6 x i8>* undef, align 4
+  store <7 x i8> undef, <7 x i8>* undef, align 4
+  store <8 x i8> undef, <8 x i8>* undef, align 4
+  store <9 x i8> undef, <9 x i8>* undef, align 4
+  store <10 x i8> undef, <10 x i8>* undef, align 4
+  store <11 x i8> undef, <11 x i8>* undef, align 4
+  store <12 x i8> undef, <12 x i8>* undef, align 4
+  store <13 x i8> undef, <13 x i8>* undef, align 4
+  store <14 x i8> undef, <14 x i8>* undef, align 4
+  store <15 x i8> undef, <15 x i8>* undef, align 4
+  ; <16 x i8> is XMM
+  store <17 x i8> undef, <17 x i8>* undef, align 4
+  store <18 x i8> undef, <18 x i8>* undef, align 4
+  store <19 x i8> undef, <19 x i8>* undef, align 4
+  store <20 x i8> undef, <20 x i8>* undef, align 4
+  store <21 x i8> undef, <21 x i8>* undef, align 4
+  store <22 x i8> undef, <22 x i8>* undef, align 4
+  store <23 x i8> undef, <23 x i8>* undef, align 4
+  store <24 x i8> undef, <24 x i8>* undef, align 4
+  store <25 x i8> undef, <25 x i8>* undef, align 4
+  store <26 x i8> undef, <26 x i8>* undef, align 4
+  store <27 x i8> undef, <27 x i8>* undef, align 4
+  store <28 x i8> undef, <28 x i8>* undef, align 4
+  store <29 x i8> undef, <29 x i8>* undef, align 4
+  store <30 x i8> undef, <30 x i8>* undef, align 4
+  store <31 x i8> undef, <31 x i8>* undef, align 4
+  ; <32 x i8> is YMM
+  store <33 x i8> undef, <33 x i8>* undef, align 4
+  store <34 x i8> undef, <34 x i8>* undef, align 4
+  store <35 x i8> undef, <35 x i8>* undef, align 4
+  store <36 x i8> undef, <36 x i8>* undef, align 4
+  store <37 x i8> undef, <37 x i8>* undef, align 4
+  store <38 x i8> undef, <38 x i8>* undef, align 4
+  store <39 x i8> undef, <39 x i8>* undef, align 4
+  store <40 x i8> undef, <40 x i8>* undef, align 4
+  store <41 x i8> undef, <41 x i8>* undef, align 4
+  store <42 x i8> undef, <42 x i8>* undef, align 4
+  store <43 x i8> undef, <43 x i8>* undef, align 4
+  store <44 x i8> undef, <44 x i8>* undef, align 4
+  store <45 x i8> undef, <45 x i8>* undef, align 4
+  store <46 x i8> undef, <46 x i8>* undef, align 4
+  store <47 x i8> undef, <47 x i8>* undef, align 4
+  store <48 x i8> undef, <48 x i8>* undef, align 4
+  store <49 x i8> undef, <49 x i8>* undef, align 4
+  store <50 x i8> undef, <50 x i8>* undef, align 4
+  store <51 x i8> undef, <51 x i8>* undef, align 4
+  store <52 x i8> undef, <52 x i8>* undef, align 4
+  store <53 x i8> undef, <53 x i8>* undef, align 4
+  store <54 x i8> undef, <54 x i8>* undef, align 4
+  store <55 x i8> undef, <55 x i8>* undef, align 4
+  store <56 x i8> undef, <56 x i8>* undef, align 4
+  store <57 x i8> undef, <57 x i8>* undef, align 4
+  store <58 x i8> undef, <58 x i8>* undef, align 4
+  store <59 x i8> undef, <59 x i8>* undef, align 4
+  store <60 x i8> undef, <60 x i8>* undef, align 4
+  store <61 x i8> undef, <61 x i8>* undef, align 4
+  store <62 x i8> undef, <62 x i8>* undef, align 4
+  store <63 x i8> undef, <63 x i8>* undef, align 4
+  ; <64 x i8> is ZMM
+
+  ret i32 undef
+}
+
+define i32 @stores_align1(i32 %arg) {
+  ; Scalars
+; SSE2-LABEL: 'stores_align1'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'stores_align1'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'stores_align1'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'stores_align1'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  store i8 undef, i8* undef, align 1
+  store i16 undef, i16* undef, align 1
+  store i32 undef, i32* undef, align 1
+  store i64 undef, i64* undef, align 1
+  store i128 undef, i128* undef, align 1
+  store i256 undef, i256* undef, align 1
+  store i512 undef, i512* undef, align 1
+
+  store float undef, float* undef, align 1
+  store double undef, double* undef, align 1
+
+  store i8* undef, i8** undef, align 1
+
+  ; XMM (128-bit) vectors
+  store <16 x i8> undef, <16 x i8>* undef, align 1
+  store <8 x i16> undef, <8 x i16>* undef, align 1
+  store <4 x i32> undef, <4 x i32>* undef, align 1
+  store <2 x i64> undef, <2 x i64>* undef, align 1
+  store <1 x i128> undef, <1 x i128>* undef, align 1
+
+  store <4 x float> undef, <4 x float>* undef, align 1
+  store <2 x double> undef, <2 x double>* undef, align 1
+
+  store <2 x i8*> undef, <2 x i8*>* undef, align 1
+
+  ; YMM (256-bit) vectors
+  store <32 x i8> undef, <32 x i8>* undef, align 1
+  store <16 x i16> undef, <16 x i16>* undef, align 1
+  store <8 x i32> undef, <8 x i32>* undef, align 1
+  store <4 x i64> undef, <4 x i64>* undef, align 1
+  store <2 x i128> undef, <2 x i128>* undef, align 1
+  store <1 x i256> undef, <1 x i256>* undef, align 1
+
+  store <8 x float> undef, <8 x float>* undef, align 1
+  store <4 x double> undef, <4 x double>* undef, align 1
+
+  store <4 x i8*> undef, <4 x i8*>* undef, align 1
+
+  ; ZMM (512-bit) vectors
+  store <64 x i8> undef, <64 x i8>* undef, align 1
+  store <32 x i16> undef, <32 x i16>* undef, align 1
+  store <16 x i32> undef, <16 x i32>* undef, align 1
+  store <8 x i64> undef, <8 x i64>* undef, align 1
+  store <4 x i128> undef, <4 x i128>* undef, align 1
+  store <2 x i256> undef, <2 x i256>* undef, align 1
+  store <1 x i512> undef, <1 x i512>* undef, align 1
+
+  store <16 x float> undef, <16 x float>* undef, align 1
+  store <8 x double> undef, <8 x double>* undef, align 1
+
+  store <8 x i8*> undef, <8 x i8*>* undef, align 1
+
+  ret i32 undef
+}
+
+define i32 @stores_partial_align1(i32 %arg) {
+  ; Partial vectors with i64 elements (doubles as pointer-sized tests))
+; SSE2-LABEL: 'stores_partial_align1'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'stores_partial_align1'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'stores_partial_align1'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'stores_partial_align1'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  store <1 x i64> undef, <1 x i64>* undef, align 1
+  ; <2 x i64> is XMM
+  store <3 x i64> undef, <3 x i64>* undef, align 1
+  ; <4 x i64> is YMM
+  store <5 x i64> undef, <5 x i64>* undef, align 1
+  store <6 x i64> undef, <6 x i64>* undef, align 1
+  store <7 x i64> undef, <7 x i64>* undef, align 1
+  ; <8 x i64> is ZMM
+
+  ; Partial vectors with double elements
+  store <1 x double> undef, <1 x double>* undef, align 1
+  ; <2 x double> is XMM
+  store <3 x double> undef, <3 x double>* undef, align 1
+  ; <4 x double> is YMM
+  store <5 x double> undef, <5 x double>* undef, align 1
+  store <6 x double> undef, <6 x double>* undef, align 1
+  store <7 x double> undef, <7 x double>* undef, align 1
+  ; <8 x double> is ZMM
+
+  ; Partial vectors with i32 elements
+  store <1 x i32> undef, <1 x i32>* undef, align 1
+  store <2 x i32> undef, <2 x i32>* undef, align 1
+  store <3 x i32> undef, <3 x i32>* undef, align 1
+  ; <4 x i32> is XMM
+  store <5 x i32> undef, <5 x i32>* undef, align 1
+  store <6 x i32> undef, <6 x i32>* undef, align 1
+  store <7 x i32> undef, <7 x i32>* undef, align 1
+  ; <8 x i32> is YMM
+  store <9 x i32> undef, <9 x i32>* undef, align 1
+  store <10 x i32> undef, <10 x i32>* undef, align 1
+  store <11 x i32> undef, <11 x i32>* undef, align 1
+  store <12 x i32> undef, <12 x i32>* undef, align 1
+  store <13 x i32> undef, <13 x i32>* undef, align 1
+  store <14 x i32> undef, <14 x i32>* undef, align 1
+  store <15 x i32> undef, <15 x i32>* undef, align 1
+  ; <16 x i32> is ZMM
+
+  ; Partial vectors with float elements
+  store <1 x float> undef, <1 x float>* undef, align 1
+  store <2 x float> undef, <2 x float>* undef, align 1
+  store <3 x float> undef, <3 x float>* undef, align 1
+  ; <4 x float> is XMM
+  store <5 x float> undef, <5 x float>* undef, align 1
+  store <6 x float> undef, <6 x float>* undef, align 1
+  store <7 x float> undef, <7 x float>* undef, align 1
+  ; <8 x float> is YMM
+  store <9 x float> undef, <9 x float>* undef, align 1
+  store <10 x float> undef, <10 x float>* undef, align 1
+  store <11 x float> undef, <11 x float>* undef, align 1
+  store <12 x float> undef, <12 x float>* undef, align 1
+  store <13 x float> undef, <13 x float>* undef, align 1
+  store <14 x float> undef, <14 x float>* undef, align 1
+  store <15 x float> undef, <15 x float>* undef, align 1
+  ; <16 x float> is ZMM
+
+  ; Partial vectors with i16 elements
+  store <1 x i16> undef, <1 x i16>* undef, align 1
+  store <2 x i16> undef, <2 x i16>* undef, align 1
+  store <3 x i16> undef, <3 x i16>* undef, align 1
+  store <4 x i16> undef, <4 x i16>* undef, align 1
+  store <5 x i16> undef, <5 x i16>* undef, align 1
+  store <6 x i16> undef, <6 x i16>* undef, align 1
+  store <7 x i16> undef, <7 x i16>* undef, align 1
+  ; <8 x i16> is XMM
+  store <9 x i16> undef, <9 x i16>* undef, align 1
+  store <10 x i16> undef, <10 x i16>* undef, align 1
+  store <11 x i16> undef, <11 x i16>* undef, align 1
+  store <12 x i16> undef, <12 x i16>* undef, align 1
+  store <13 x i16> undef, <13 x i16>* undef, align 1
+  store <14 x i16> undef, <14 x i16>* undef, align 1
+  store <15 x i16> undef, <15 x i16>* undef, align 1
+  ; <16 x i16> is YMM
+  store <17 x i16> undef, <17 x i16>* undef, align 1
+  store <18 x i16> undef, <18 x i16>* undef, align 1
+  store <19 x i16> undef, <19 x i16>* undef, align 1
+  store <20 x i16> undef, <20 x i16>* undef, align 1
+  store <21 x i16> undef, <21 x i16>* undef, align 1
+  store <22 x i16> undef, <22 x i16>* undef, align 1
+  store <23 x i16> undef, <23 x i16>* undef, align 1
+  store <24 x i16> undef, <24 x i16>* undef, align 1
+  store <25 x i16> undef, <25 x i16>* undef, align 1
+  store <26 x i16> undef, <26 x i16>* undef, align 1
+  store <27 x i16> undef, <27 x i16>* undef, align 1
+  store <28 x i16> undef, <28 x i16>* undef, align 1
+  store <29 x i16> undef, <29 x i16>* undef, align 1
+  store <30 x i16> undef, <30 x i16>* undef, align 1
+  store <31 x i16> undef, <31 x i16>* undef, align 1
+  ; <32 x i16> is ZMM
 
-  store <47 x i16> undef, <47 x i16>* undef, align 4
-  store <48 x i16> undef, <48 x i16>* undef, align 4 ; 3 x i256
-  store <49 x i16> undef, <49 x i16>* undef, align 4
+  ; Partial vectors with i8 elements
+  store <1 x i8> undef, <1 x i8>* undef, align 1
+  store <2 x i8> undef, <2 x i8>* undef, align 1
+  store <3 x i8> undef, <3 x i8>* undef, align 1
+  store <4 x i8> undef, <4 x i8>* undef, align 1
+  store <5 x i8> undef, <5 x i8>* undef, align 1
+  store <6 x i8> undef, <6 x i8>* undef, align 1
+  store <7 x i8> undef, <7 x i8>* undef, align 1
+  store <8 x i8> undef, <8 x i8>* undef, align 1
+  store <9 x i8> undef, <9 x i8>* undef, align 1
+  store <10 x i8> undef, <10 x i8>* undef, align 1
+  store <11 x i8> undef, <11 x i8>* undef, align 1
+  store <12 x i8> undef, <12 x i8>* undef, align 1
+  store <13 x i8> undef, <13 x i8>* undef, align 1
+  store <14 x i8> undef, <14 x i8>* undef, align 1
+  store <15 x i8> undef, <15 x i8>* undef, align 1
+  ; <16 x i8> is XMM
+  store <17 x i8> undef, <17 x i8>* undef, align 1
+  store <18 x i8> undef, <18 x i8>* undef, align 1
+  store <19 x i8> undef, <19 x i8>* undef, align 1
+  store <20 x i8> undef, <20 x i8>* undef, align 1
+  store <21 x i8> undef, <21 x i8>* undef, align 1
+  store <22 x i8> undef, <22 x i8>* undef, align 1
+  store <23 x i8> undef, <23 x i8>* undef, align 1
+  store <24 x i8> undef, <24 x i8>* undef, align 1
+  store <25 x i8> undef, <25 x i8>* undef, align 1
+  store <26 x i8> undef, <26 x i8>* undef, align 1
+  store <27 x i8> undef, <27 x i8>* undef, align 1
+  store <28 x i8> undef, <28 x i8>* undef, align 1
+  store <29 x i8> undef, <29 x i8>* undef, align 1
+  store <30 x i8> undef, <30 x i8>* undef, align 1
+  store <31 x i8> undef, <31 x i8>* undef, align 1
+  ; <32 x i8> is YMM
+  store <33 x i8> undef, <33 x i8>* undef, align 1
+  store <34 x i8> undef, <34 x i8>* undef, align 1
+  store <35 x i8> undef, <35 x i8>* undef, align 1
+  store <36 x i8> undef, <36 x i8>* undef, align 1
+  store <37 x i8> undef, <37 x i8>* undef, align 1
+  store <38 x i8> undef, <38 x i8>* undef, align 1
+  store <39 x i8> undef, <39 x i8>* undef, align 1
+  store <40 x i8> undef, <40 x i8>* undef, align 1
+  store <41 x i8> undef, <41 x i8>* undef, align 1
+  store <42 x i8> undef, <42 x i8>* undef, align 1
+  store <43 x i8> undef, <43 x i8>* undef, align 1
+  store <44 x i8> undef, <44 x i8>* undef, align 1
+  store <45 x i8> undef, <45 x i8>* undef, align 1
+  store <46 x i8> undef, <46 x i8>* undef, align 1
+  store <47 x i8> undef, <47 x i8>* undef, align 1
+  store <48 x i8> undef, <48 x i8>* undef, align 1
+  store <49 x i8> undef, <49 x i8>* undef, align 1
+  store <50 x i8> undef, <50 x i8>* undef, align 1
+  store <51 x i8> undef, <51 x i8>* undef, align 1
+  store <52 x i8> undef, <52 x i8>* undef, align 1
+  store <53 x i8> undef, <53 x i8>* undef, align 1
+  store <54 x i8> undef, <54 x i8>* undef, align 1
+  store <55 x i8> undef, <55 x i8>* undef, align 1
+  store <56 x i8> undef, <56 x i8>* undef, align 1
+  store <57 x i8> undef, <57 x i8>* undef, align 1
+  store <58 x i8> undef, <58 x i8>* undef, align 1
+  store <59 x i8> undef, <59 x i8>* undef, align 1
+  store <60 x i8> undef, <60 x i8>* undef, align 1
+  store <61 x i8> undef, <61 x i8>* undef, align 1
+  store <62 x i8> undef, <62 x i8>* undef, align 1
+  store <63 x i8> undef, <63 x i8>* undef, align 1
+  ; <64 x i8> is ZMM
 
   ret i32 undef
 }
 
-define i32 @stores_align(i32 %arg) {
-; SSE-LABEL: 'stores_align'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX-LABEL: 'stores_align'
+define i32 @stores_align64(i32 %arg) {
+  ; Scalars
+; SSE2-LABEL: 'stores_align64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'stores_align64'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'stores_align64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'stores_align64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  store i8 undef, i8* undef, align 64
+  store i16 undef, i16* undef, align 64
+  store i32 undef, i32* undef, align 64
+  store i64 undef, i64* undef, align 64
+  store i128 undef, i128* undef, align 64
+  store i256 undef, i256* undef, align 64
+  store i512 undef, i512* undef, align 64
+
+  store float undef, float* undef, align 64
+  store double undef, double* undef, align 64
+
+  store i8* undef, i8** undef, align 64
+
+  ; XMM (128-bit) vectors
+  store <16 x i8> undef, <16 x i8>* undef, align 64
+  store <8 x i16> undef, <8 x i16>* undef, align 64
+  store <4 x i32> undef, <4 x i32>* undef, align 64
+  store <2 x i64> undef, <2 x i64>* undef, align 64
+  store <1 x i128> undef, <1 x i128>* undef, align 64
+
+  store <4 x float> undef, <4 x float>* undef, align 64
+  store <2 x double> undef, <2 x double>* undef, align 64
+
+  store <2 x i8*> undef, <2 x i8*>* undef, align 64
+
+  ; YMM (256-bit) vectors
+  store <32 x i8> undef, <32 x i8>* undef, align 64
+  store <16 x i16> undef, <16 x i16>* undef, align 64
+  store <8 x i32> undef, <8 x i32>* undef, align 64
+  store <4 x i64> undef, <4 x i64>* undef, align 64
+  store <2 x i128> undef, <2 x i128>* undef, align 64
+  store <1 x i256> undef, <1 x i256>* undef, align 64
+
+  store <8 x float> undef, <8 x float>* undef, align 64
+  store <4 x double> undef, <4 x double>* undef, align 64
+
+  store <4 x i8*> undef, <4 x i8*>* undef, align 64
+
+  ; ZMM (512-bit) vectors
+  store <64 x i8> undef, <64 x i8>* undef, align 64
+  store <32 x i16> undef, <32 x i16>* undef, align 64
+  store <16 x i32> undef, <16 x i32>* undef, align 64
+  store <8 x i64> undef, <8 x i64>* undef, align 64
+  store <4 x i128> undef, <4 x i128>* undef, align 64
+  store <2 x i256> undef, <2 x i256>* undef, align 64
+  store <1 x i512> undef, <1 x i512>* undef, align 64
+
+  store <16 x float> undef, <16 x float>* undef, align 64
+  store <8 x double> undef, <8 x double>* undef, align 64
+
+  store <8 x i8*> undef, <8 x i8*>* undef, align 64
+
+  ret i32 undef
+}
+
+define i32 @stores_partial_align64(i32 %arg) {
+  ; Partial vectors with i64 elements (doubles as pointer-sized tests))
+; SSE2-LABEL: 'stores_partial_align64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'stores_partial_align64'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'stores_partial_align64'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; AVX512-LABEL: 'stores_align'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
+; AVX512-LABEL: 'stores_partial_align64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-  store i8 undef, i8* undef, align 64
-  store i16 undef, i16* undef, align 64
-  store i32 undef, i32* undef, align 64
-  store i64 undef, i64* undef, align 64
-  store i128 undef, i128* undef, align 64
-
-  store <4 x i16> undef, <4 x i16>* undef, align 64
-  store <4 x i32> undef, <4 x i32>* undef, align 64
-  store <4 x i64> undef, <4 x i64>* undef, align 64
-
-  store <8 x i16> undef, <8 x i16>* undef, align 64
-  store <8 x i32> undef, <8 x i32>* undef, align 64
-  store <8 x i64> undef, <8 x i64>* undef, align 64
+  store <1 x i64> undef, <1 x i64>* undef, align 64
+  ; <2 x i64> is XMM
+  store <3 x i64> undef, <3 x i64>* undef, align 64
+  ; <4 x i64> is YMM
+  store <5 x i64> undef, <5 x i64>* undef, align 64
+  store <6 x i64> undef, <6 x i64>* undef, align 64
+  store <7 x i64> undef, <7 x i64>* undef, align 64
+  ; <8 x i64> is ZMM
 
-  store <3 x float> undef, <3 x float>* undef, align 64
+  ; Partial vectors with double elements
+  store <1 x double> undef, <1 x double>* undef, align 64
+  ; <2 x double> is XMM
   store <3 x double> undef, <3 x double>* undef, align 64
+  ; <4 x double> is YMM
+  store <5 x double> undef, <5 x double>* undef, align 64
+  store <6 x double> undef, <6 x double>* undef, align 64
+  store <7 x double> undef, <7 x double>* undef, align 64
+  ; <8 x double> is ZMM
 
+  ; Partial vectors with i32 elements
+  store <1 x i32> undef, <1 x i32>* undef, align 64
+  store <2 x i32> undef, <2 x i32>* undef, align 64
   store <3 x i32> undef, <3 x i32>* undef, align 64
-  store <3 x i64> undef, <3 x i64>* undef, align 64
+  ; <4 x i32> is XMM
   store <5 x i32> undef, <5 x i32>* undef, align 64
-  store <5 x i64> undef, <5 x i64>* undef, align 64
+  store <6 x i32> undef, <6 x i32>* undef, align 64
+  store <7 x i32> undef, <7 x i32>* undef, align 64
+  ; <8 x i32> is YMM
+  store <9 x i32> undef, <9 x i32>* undef, align 64
+  store <10 x i32> undef, <10 x i32>* undef, align 64
+  store <11 x i32> undef, <11 x i32>* undef, align 64
+  store <12 x i32> undef, <12 x i32>* undef, align 64
+  store <13 x i32> undef, <13 x i32>* undef, align 64
+  store <14 x i32> undef, <14 x i32>* undef, align 64
+  store <15 x i32> undef, <15 x i32>* undef, align 64
+  ; <16 x i32> is ZMM
 
+  ; Partial vectors with float elements
+  store <1 x float> undef, <1 x float>* undef, align 64
+  store <2 x float> undef, <2 x float>* undef, align 64
+  store <3 x float> undef, <3 x float>* undef, align 64
+  ; <4 x float> is XMM
+  store <5 x float> undef, <5 x float>* undef, align 64
+  store <6 x float> undef, <6 x float>* undef, align 64
+  store <7 x float> undef, <7 x float>* undef, align 64
+  ; <8 x float> is YMM
+  store <9 x float> undef, <9 x float>* undef, align 64
+  store <10 x float> undef, <10 x float>* undef, align 64
+  store <11 x float> undef, <11 x float>* undef, align 64
+  store <12 x float> undef, <12 x float>* undef, align 64
+  store <13 x float> undef, <13 x float>* undef, align 64
+  store <14 x float> undef, <14 x float>* undef, align 64
+  store <15 x float> undef, <15 x float>* undef, align 64
+  ; <16 x float> is ZMM
+
+  ; Partial vectors with i16 elements
+  store <1 x i16> undef, <1 x i16>* undef, align 64
+  store <2 x i16> undef, <2 x i16>* undef, align 64
+  store <3 x i16> undef, <3 x i16>* undef, align 64
+  store <4 x i16> undef, <4 x i16>* undef, align 64
   store <5 x i16> undef, <5 x i16>* undef, align 64
-  store <6 x i16> undef, <6 x i16>* undef, align 64 ; 1 x i64 + 1 x i32
+  store <6 x i16> undef, <6 x i16>* undef, align 64
   store <7 x i16> undef, <7 x i16>* undef, align 64
-
+  ; <8 x i16> is XMM
+  store <9 x i16> undef, <9 x i16>* undef, align 64
+  store <10 x i16> undef, <10 x i16>* undef, align 64
   store <11 x i16> undef, <11 x i16>* undef, align 64
-  store <12 x i16> undef, <12 x i16>* undef, align 64 ; 1 x i128 + 1 x i64
+  store <12 x i16> undef, <12 x i16>* undef, align 64
   store <13 x i16> undef, <13 x i16>* undef, align 64
-
+  store <14 x i16> undef, <14 x i16>* undef, align 64
+  store <15 x i16> undef, <15 x i16>* undef, align 64
+  ; <16 x i16> is YMM
+  store <17 x i16> undef, <17 x i16>* undef, align 64
+  store <18 x i16> undef, <18 x i16>* undef, align 64
+  store <19 x i16> undef, <19 x i16>* undef, align 64
+  store <20 x i16> undef, <20 x i16>* undef, align 64
+  store <21 x i16> undef, <21 x i16>* undef, align 64
+  store <22 x i16> undef, <22 x i16>* undef, align 64
   store <23 x i16> undef, <23 x i16>* undef, align 64
-  store <24 x i16> undef, <24 x i16>* undef, align 64 ; 1 x i256 + 1 x i128
+  store <24 x i16> undef, <24 x i16>* undef, align 64
   store <25 x i16> undef, <25 x i16>* undef, align 64
+  store <26 x i16> undef, <26 x i16>* undef, align 64
+  store <27 x i16> undef, <27 x i16>* undef, align 64
+  store <28 x i16> undef, <28 x i16>* undef, align 64
+  store <29 x i16> undef, <29 x i16>* undef, align 64
+  store <30 x i16> undef, <30 x i16>* undef, align 64
+  store <31 x i16> undef, <31 x i16>* undef, align 64
+  ; <32 x i16> is ZMM
 
-  store <47 x i16> undef, <47 x i16>* undef, align 64
-  store <48 x i16> undef, <48 x i16>* undef, align 64 ; 3 x i256
-  store <49 x i16> undef, <49 x i16>* undef, align 64
+  ; Partial vectors with i8 elements
+  store <1 x i8> undef, <1 x i8>* undef, align 64
+  store <2 x i8> undef, <2 x i8>* undef, align 64
+  store <3 x i8> undef, <3 x i8>* undef, align 64
+  store <4 x i8> undef, <4 x i8>* undef, align 64
+  store <5 x i8> undef, <5 x i8>* undef, align 64
+  store <6 x i8> undef, <6 x i8>* undef, align 64
+  store <7 x i8> undef, <7 x i8>* undef, align 64
+  store <8 x i8> undef, <8 x i8>* undef, align 64
+  store <9 x i8> undef, <9 x i8>* undef, align 64
+  store <10 x i8> undef, <10 x i8>* undef, align 64
+  store <11 x i8> undef, <11 x i8>* undef, align 64
+  store <12 x i8> undef, <12 x i8>* undef, align 64
+  store <13 x i8> undef, <13 x i8>* undef, align 64
+  store <14 x i8> undef, <14 x i8>* undef, align 64
+  store <15 x i8> undef, <15 x i8>* undef, align 64
+  ; <16 x i8> is XMM
+  store <17 x i8> undef, <17 x i8>* undef, align 64
+  store <18 x i8> undef, <18 x i8>* undef, align 64
+  store <19 x i8> undef, <19 x i8>* undef, align 64
+  store <20 x i8> undef, <20 x i8>* undef, align 64
+  store <21 x i8> undef, <21 x i8>* undef, align 64
+  store <22 x i8> undef, <22 x i8>* undef, align 64
+  store <23 x i8> undef, <23 x i8>* undef, align 64
+  store <24 x i8> undef, <24 x i8>* undef, align 64
+  store <25 x i8> undef, <25 x i8>* undef, align 64
+  store <26 x i8> undef, <26 x i8>* undef, align 64
+  store <27 x i8> undef, <27 x i8>* undef, align 64
+  store <28 x i8> undef, <28 x i8>* undef, align 64
+  store <29 x i8> undef, <29 x i8>* undef, align 64
+  store <30 x i8> undef, <30 x i8>* undef, align 64
+  store <31 x i8> undef, <31 x i8>* undef, align 64
+  ; <32 x i8> is YMM
+  store <33 x i8> undef, <33 x i8>* undef, align 64
+  store <34 x i8> undef, <34 x i8>* undef, align 64
+  store <35 x i8> undef, <35 x i8>* undef, align 64
+  store <36 x i8> undef, <36 x i8>* undef, align 64
+  store <37 x i8> undef, <37 x i8>* undef, align 64
+  store <38 x i8> undef, <38 x i8>* undef, align 64
+  store <39 x i8> undef, <39 x i8>* undef, align 64
+  store <40 x i8> undef, <40 x i8>* undef, align 64
+  store <41 x i8> undef, <41 x i8>* undef, align 64
+  store <42 x i8> undef, <42 x i8>* undef, align 64
+  store <43 x i8> undef, <43 x i8>* undef, align 64
+  store <44 x i8> undef, <44 x i8>* undef, align 64
+  store <45 x i8> undef, <45 x i8>* undef, align 64
+  store <46 x i8> undef, <46 x i8>* undef, align 64
+  store <47 x i8> undef, <47 x i8>* undef, align 64
+  store <48 x i8> undef, <48 x i8>* undef, align 64
+  store <49 x i8> undef, <49 x i8>* undef, align 64
+  store <50 x i8> undef, <50 x i8>* undef, align 64
+  store <51 x i8> undef, <51 x i8>* undef, align 64
+  store <52 x i8> undef, <52 x i8>* undef, align 64
+  store <53 x i8> undef, <53 x i8>* undef, align 64
+  store <54 x i8> undef, <54 x i8>* undef, align 64
+  store <55 x i8> undef, <55 x i8>* undef, align 64
+  store <56 x i8> undef, <56 x i8>* undef, align 64
+  store <57 x i8> undef, <57 x i8>* undef, align 64
+  store <58 x i8> undef, <58 x i8>* undef, align 64
+  store <59 x i8> undef, <59 x i8>* undef, align 64
+  store <60 x i8> undef, <60 x i8>* undef, align 64
+  store <61 x i8> undef, <61 x i8>* undef, align 64
+  store <62 x i8> undef, <62 x i8>* undef, align 64
+  store <63 x i8> undef, <63 x i8>* undef, align 64
+  ; <64 x i8> is ZMM
 
   ret i32 undef
 }
 
-define i32 @loads(i32 %arg) {
-; SSE-LABEL: 'loads'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX-LABEL: 'loads'
+define i32 @loads_align4(i32 %arg) {
+  ; Scalars
+; SSE2-LABEL: 'loads_align4'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'loads_align4'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'loads_align4'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 4
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; AVX512-LABEL: 'loads'
+; AVX512-LABEL: 'loads_align4'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 4
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   load i8, i8* undef, align 4
@@ -384,137 +2853,1785 @@ define i32 @loads(i32 %arg) {
   load i32, i32* undef, align 4
   load i64, i64* undef, align 4
   load i128, i128* undef, align 4
+  load i256, i256* undef, align 4
+  load i512, i512* undef, align 4
 
-  load <2 x i32>, <2 x i32>* undef, align 4
-  load <4 x i32>, <4 x i32>* undef, align 4
-  load <8 x i32>, <8 x i32>* undef, align 4
+  load float, float* undef, align 4
+  load double, double* undef, align 4
+
+  load i8*, i8** undef, align 4
 
+  ; XMM (128-bit) vectors
+  load <16 x i8>, <16 x i8>* undef, align 4
+  load <8 x i16>, <8 x i16>* undef, align 4
+  load <4 x i32>, <4 x i32>* undef, align 4
   load <2 x i64>, <2 x i64>* undef, align 4
+  load <1 x i128>, <1 x i128>* undef, align 4
+
+  load <4 x float>, <4 x float>* undef, align 4
+  load <2 x double>, <2 x double>* undef, align 4
+
+  load <2 x i8*>, <2 x i8*>* undef, align 4
+
+  ; YMM (256-bit) vectors
+  load <32 x i8>, <32 x i8>* undef, align 4
+  load <16 x i16>, <16 x i16>* undef, align 4
+  load <8 x i32>, <8 x i32>* undef, align 4
   load <4 x i64>, <4 x i64>* undef, align 4
+  load <2 x i128>, <2 x i128>* undef, align 4
+  load <1 x i256>, <1 x i256>* undef, align 4
+
+  load <8 x float>, <8 x float>* undef, align 4
+  load <4 x double>, <4 x double>* undef, align 4
+
+  load <4 x i8*>, <4 x i8*>* undef, align 4
+
+  ; ZMM (512-bit) vectors
+  load <64 x i8>, <64 x i8>* undef, align 4
+  load <32 x i16>, <32 x i16>* undef, align 4
+  load <16 x i32>, <16 x i32>* undef, align 4
   load <8 x i64>, <8 x i64>* undef, align 4
+  load <4 x i128>, <4 x i128>* undef, align 4
+  load <2 x i256>, <2 x i256>* undef, align 4
+  load <1 x i512>, <1 x i512>* undef, align 4
 
-  load <3 x float>, <3 x float>* undef, align 4
+  load <16 x float>, <16 x float>* undef, align 4
+  load <8 x double>, <8 x double>* undef, align 4
+
+  load <8 x i8*>, <8 x i8*>* undef, align 4
+
+  ret i32 undef
+}
+
+define i32 @loads_partial_align4(i32 %arg) {
+  ; Partial vectors with i64 elements (doubles as pointer-sized tests))
+; SSE2-LABEL: 'loads_partial_align4'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'loads_partial_align4'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'loads_partial_align4'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'loads_partial_align4'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  load <1 x i64>, <1 x i64>* undef, align 4
+  ; <2 x i64> is XMM
+  load <3 x i64>, <3 x i64>* undef, align 4
+  ; <4 x i64> is YMM
+  load <5 x i64>, <5 x i64>* undef, align 4
+  load <6 x i64>, <6 x i64>* undef, align 4
+  load <7 x i64>, <7 x i64>* undef, align 4
+  ; <8 x i64> is ZMM
+
+  ; Partial vectors with double elements
+  load <1 x double>, <1 x double>* undef, align 4
+  ; <2 x double> is XMM
   load <3 x double>, <3 x double>* undef, align 4
+  ; <4 x double> is YMM
+  load <5 x double>, <5 x double>* undef, align 4
+  load <6 x double>, <6 x double>* undef, align 4
+  load <7 x double>, <7 x double>* undef, align 4
+  ; <8 x double> is ZMM
 
+  ; Partial vectors with i32 elements
+  load <1 x i32>, <1 x i32>* undef, align 4
+  load <2 x i32>, <2 x i32>* undef, align 4
   load <3 x i32>, <3 x i32>* undef, align 4
-  load <3 x i64>, <3 x i64>* undef, align 4
+  ; <4 x i32> is XMM
   load <5 x i32>, <5 x i32>* undef, align 4
-  load <5 x i64>, <5 x i64>* undef, align 4
+  load <6 x i32>, <6 x i32>* undef, align 4
+  load <7 x i32>, <7 x i32>* undef, align 4
+  ; <8 x i32> is YMM
+  load <9 x i32>, <9 x i32>* undef, align 4
+  load <10 x i32>, <10 x i32>* undef, align 4
+  load <11 x i32>, <11 x i32>* undef, align 4
+  load <12 x i32>, <12 x i32>* undef, align 4
+  load <13 x i32>, <13 x i32>* undef, align 4
+  load <14 x i32>, <14 x i32>* undef, align 4
+  load <15 x i32>, <15 x i32>* undef, align 4
+  ; <16 x i32> is ZMM
 
+  ; Partial vectors with float elements
+  load <1 x float>, <1 x float>* undef, align 4
+  load <2 x float>, <2 x float>* undef, align 4
+  load <3 x float>, <3 x float>* undef, align 4
+  ; <4 x float> is XMM
+  load <5 x float>, <5 x float>* undef, align 4
+  load <6 x float>, <6 x float>* undef, align 4
+  load <7 x float>, <7 x float>* undef, align 4
+  ; <8 x float> is YMM
+  load <9 x float>, <9 x float>* undef, align 4
+  load <10 x float>, <10 x float>* undef, align 4
+  load <11 x float>, <11 x float>* undef, align 4
+  load <12 x float>, <12 x float>* undef, align 4
+  load <13 x float>, <13 x float>* undef, align 4
+  load <14 x float>, <14 x float>* undef, align 4
+  load <15 x float>, <15 x float>* undef, align 4
+  ; <16 x float> is ZMM
+
+  ; Partial vectors with i16 elements
+  load <1 x i16>, <1 x i16>* undef, align 4
+  load <2 x i16>, <2 x i16>* undef, align 4
+  load <3 x i16>, <3 x i16>* undef, align 4
+  load <4 x i16>, <4 x i16>* undef, align 4
   load <5 x i16>, <5 x i16>* undef, align 4
-  load <6 x i16>, <6 x i16>* undef, align 4 ; 1 x i64 + 1 x i32
+  load <6 x i16>, <6 x i16>* undef, align 4
   load <7 x i16>, <7 x i16>* undef, align 4
-
+  ; <8 x i16> is XMM
+  load <9 x i16>, <9 x i16>* undef, align 4
+  load <10 x i16>, <10 x i16>* undef, align 4
   load <11 x i16>, <11 x i16>* undef, align 4
-  load <12 x i16>, <12 x i16>* undef, align 4 ; 1 x i128 + 1 x i64
+  load <12 x i16>, <12 x i16>* undef, align 4
   load <13 x i16>, <13 x i16>* undef, align 4
-
+  load <14 x i16>, <14 x i16>* undef, align 4
+  load <15 x i16>, <15 x i16>* undef, align 4
+  ; <16 x i16> is YMM
+  load <17 x i16>, <17 x i16>* undef, align 4
+  load <18 x i16>, <18 x i16>* undef, align 4
+  load <19 x i16>, <19 x i16>* undef, align 4
+  load <20 x i16>, <20 x i16>* undef, align 4
+  load <21 x i16>, <21 x i16>* undef, align 4
+  load <22 x i16>, <22 x i16>* undef, align 4
   load <23 x i16>, <23 x i16>* undef, align 4
-  load <24 x i16>, <24 x i16>* undef, align 4 ; 1 x i256 + 1 x i128
+  load <24 x i16>, <24 x i16>* undef, align 4
   load <25 x i16>, <25 x i16>* undef, align 4
+  load <26 x i16>, <26 x i16>* undef, align 4
+  load <27 x i16>, <27 x i16>* undef, align 4
+  load <28 x i16>, <28 x i16>* undef, align 4
+  load <29 x i16>, <29 x i16>* undef, align 4
+  load <30 x i16>, <30 x i16>* undef, align 4
+  load <31 x i16>, <31 x i16>* undef, align 4
+  ; <32 x i16> is ZMM
+
+  ; Partial vectors with i8 elements
+  load <1 x i8>, <1 x i8>* undef, align 4
+  load <2 x i8>, <2 x i8>* undef, align 4
+  load <3 x i8>, <3 x i8>* undef, align 4
+  load <4 x i8>, <4 x i8>* undef, align 4
+  load <5 x i8>, <5 x i8>* undef, align 4
+  load <6 x i8>, <6 x i8>* undef, align 4
+  load <7 x i8>, <7 x i8>* undef, align 4
+  load <8 x i8>, <8 x i8>* undef, align 4
+  load <9 x i8>, <9 x i8>* undef, align 4
+  load <10 x i8>, <10 x i8>* undef, align 4
+  load <11 x i8>, <11 x i8>* undef, align 4
+  load <12 x i8>, <12 x i8>* undef, align 4
+  load <13 x i8>, <13 x i8>* undef, align 4
+  load <14 x i8>, <14 x i8>* undef, align 4
+  load <15 x i8>, <15 x i8>* undef, align 4
+  ; <16 x i8> is XMM
+  load <17 x i8>, <17 x i8>* undef, align 4
+  load <18 x i8>, <18 x i8>* undef, align 4
+  load <19 x i8>, <19 x i8>* undef, align 4
+  load <20 x i8>, <20 x i8>* undef, align 4
+  load <21 x i8>, <21 x i8>* undef, align 4
+  load <22 x i8>, <22 x i8>* undef, align 4
+  load <23 x i8>, <23 x i8>* undef, align 4
+  load <24 x i8>, <24 x i8>* undef, align 4
+  load <25 x i8>, <25 x i8>* undef, align 4
+  load <26 x i8>, <26 x i8>* undef, align 4
+  load <27 x i8>, <27 x i8>* undef, align 4
+  load <28 x i8>, <28 x i8>* undef, align 4
+  load <29 x i8>, <29 x i8>* undef, align 4
+  load <30 x i8>, <30 x i8>* undef, align 4
+  load <31 x i8>, <31 x i8>* undef, align 4
+  ; <32 x i8> is YMM
+  load <33 x i8>, <33 x i8>* undef, align 4
+  load <34 x i8>, <34 x i8>* undef, align 4
+  load <35 x i8>, <35 x i8>* undef, align 4
+  load <36 x i8>, <36 x i8>* undef, align 4
+  load <37 x i8>, <37 x i8>* undef, align 4
+  load <38 x i8>, <38 x i8>* undef, align 4
+  load <39 x i8>, <39 x i8>* undef, align 4
+  load <40 x i8>, <40 x i8>* undef, align 4
+  load <41 x i8>, <41 x i8>* undef, align 4
+  load <42 x i8>, <42 x i8>* undef, align 4
+  load <43 x i8>, <43 x i8>* undef, align 4
+  load <44 x i8>, <44 x i8>* undef, align 4
+  load <45 x i8>, <45 x i8>* undef, align 4
+  load <46 x i8>, <46 x i8>* undef, align 4
+  load <47 x i8>, <47 x i8>* undef, align 4
+  load <48 x i8>, <48 x i8>* undef, align 4
+  load <49 x i8>, <49 x i8>* undef, align 4
+  load <50 x i8>, <50 x i8>* undef, align 4
+  load <51 x i8>, <51 x i8>* undef, align 4
+  load <52 x i8>, <52 x i8>* undef, align 4
+  load <53 x i8>, <53 x i8>* undef, align 4
+  load <54 x i8>, <54 x i8>* undef, align 4
+  load <55 x i8>, <55 x i8>* undef, align 4
+  load <56 x i8>, <56 x i8>* undef, align 4
+  load <57 x i8>, <57 x i8>* undef, align 4
+  load <58 x i8>, <58 x i8>* undef, align 4
+  load <59 x i8>, <59 x i8>* undef, align 4
+  load <60 x i8>, <60 x i8>* undef, align 4
+  load <61 x i8>, <61 x i8>* undef, align 4
+  load <62 x i8>, <62 x i8>* undef, align 4
+  load <63 x i8>, <63 x i8>* undef, align 4
+  ; <64 x i8> is ZMM
+
+  ret i32 undef
+}
+
+define i32 @loads_align1(i32 %arg) {
+  ; Scalars
+; SSE2-LABEL: 'loads_align1'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'loads_align1'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'loads_align1'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'loads_align1'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  load i8, i8* undef, align 1
+  load i16, i16* undef, align 1
+  load i32, i32* undef, align 1
+  load i64, i64* undef, align 1
+  load i128, i128* undef, align 1
+  load i256, i256* undef, align 1
+  load i512, i512* undef, align 1
+
+  load float, float* undef, align 1
+  load double, double* undef, align 1
+
+  load i8*, i8** undef, align 1
+
+  ; XMM (128-bit) vectors
+  load <16 x i8>, <16 x i8>* undef, align 1
+  load <8 x i16>, <8 x i16>* undef, align 1
+  load <4 x i32>, <4 x i32>* undef, align 1
+  load <2 x i64>, <2 x i64>* undef, align 1
+  load <1 x i128>, <1 x i128>* undef, align 1
+
+  load <4 x float>, <4 x float>* undef, align 1
+  load <2 x double>, <2 x double>* undef, align 1
+
+  load <2 x i8*>, <2 x i8*>* undef, align 1
+
+  ; YMM (256-bit) vectors
+  load <32 x i8>, <32 x i8>* undef, align 1
+  load <16 x i16>, <16 x i16>* undef, align 1
+  load <8 x i32>, <8 x i32>* undef, align 1
+  load <4 x i64>, <4 x i64>* undef, align 1
+  load <2 x i128>, <2 x i128>* undef, align 1
+  load <1 x i256>, <1 x i256>* undef, align 1
+
+  load <8 x float>, <8 x float>* undef, align 1
+  load <4 x double>, <4 x double>* undef, align 1
+
+  load <4 x i8*>, <4 x i8*>* undef, align 1
+
+  ; ZMM (512-bit) vectors
+  load <64 x i8>, <64 x i8>* undef, align 1
+  load <32 x i16>, <32 x i16>* undef, align 1
+  load <16 x i32>, <16 x i32>* undef, align 1
+  load <8 x i64>, <8 x i64>* undef, align 1
+  load <4 x i128>, <4 x i128>* undef, align 1
+  load <2 x i256>, <2 x i256>* undef, align 1
+  load <1 x i512>, <1 x i512>* undef, align 1
+
+  load <16 x float>, <16 x float>* undef, align 1
+  load <8 x double>, <8 x double>* undef, align 1
+
+  load <8 x i8*>, <8 x i8*>* undef, align 1
+
+  ret i32 undef
+}
+
+define i32 @loads_partial_align1(i32 %arg) {
+  ; Partial vectors with i64 elements (doubles as pointer-sized tests))
+; SSE2-LABEL: 'loads_partial_align1'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'loads_partial_align1'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'loads_partial_align1'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'loads_partial_align1'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  load <1 x i64>, <1 x i64>* undef, align 1
+  ; <2 x i64> is XMM
+  load <3 x i64>, <3 x i64>* undef, align 1
+  ; <4 x i64> is YMM
+  load <5 x i64>, <5 x i64>* undef, align 1
+  load <6 x i64>, <6 x i64>* undef, align 1
+  load <7 x i64>, <7 x i64>* undef, align 1
+  ; <8 x i64> is ZMM
+
+  ; Partial vectors with double elements
+  load <1 x double>, <1 x double>* undef, align 1
+  ; <2 x double> is XMM
+  load <3 x double>, <3 x double>* undef, align 1
+  ; <4 x double> is YMM
+  load <5 x double>, <5 x double>* undef, align 1
+  load <6 x double>, <6 x double>* undef, align 1
+  load <7 x double>, <7 x double>* undef, align 1
+  ; <8 x double> is ZMM
+
+  ; Partial vectors with i32 elements
+  load <1 x i32>, <1 x i32>* undef, align 1
+  load <2 x i32>, <2 x i32>* undef, align 1
+  load <3 x i32>, <3 x i32>* undef, align 1
+  ; <4 x i32> is XMM
+  load <5 x i32>, <5 x i32>* undef, align 1
+  load <6 x i32>, <6 x i32>* undef, align 1
+  load <7 x i32>, <7 x i32>* undef, align 1
+  ; <8 x i32> is YMM
+  load <9 x i32>, <9 x i32>* undef, align 1
+  load <10 x i32>, <10 x i32>* undef, align 1
+  load <11 x i32>, <11 x i32>* undef, align 1
+  load <12 x i32>, <12 x i32>* undef, align 1
+  load <13 x i32>, <13 x i32>* undef, align 1
+  load <14 x i32>, <14 x i32>* undef, align 1
+  load <15 x i32>, <15 x i32>* undef, align 1
+  ; <16 x i32> is ZMM
+
+  ; Partial vectors with float elements
+  load <1 x float>, <1 x float>* undef, align 1
+  load <2 x float>, <2 x float>* undef, align 1
+  load <3 x float>, <3 x float>* undef, align 1
+  ; <4 x float> is XMM
+  load <5 x float>, <5 x float>* undef, align 1
+  load <6 x float>, <6 x float>* undef, align 1
+  load <7 x float>, <7 x float>* undef, align 1
+  ; <8 x float> is YMM
+  load <9 x float>, <9 x float>* undef, align 1
+  load <10 x float>, <10 x float>* undef, align 1
+  load <11 x float>, <11 x float>* undef, align 1
+  load <12 x float>, <12 x float>* undef, align 1
+  load <13 x float>, <13 x float>* undef, align 1
+  load <14 x float>, <14 x float>* undef, align 1
+  load <15 x float>, <15 x float>* undef, align 1
+  ; <16 x float> is ZMM
+
+  ; Partial vectors with i16 elements
+  load <1 x i16>, <1 x i16>* undef, align 1
+  load <2 x i16>, <2 x i16>* undef, align 1
+  load <3 x i16>, <3 x i16>* undef, align 1
+  load <4 x i16>, <4 x i16>* undef, align 1
+  load <5 x i16>, <5 x i16>* undef, align 1
+  load <6 x i16>, <6 x i16>* undef, align 1
+  load <7 x i16>, <7 x i16>* undef, align 1
+  ; <8 x i16> is XMM
+  load <9 x i16>, <9 x i16>* undef, align 1
+  load <10 x i16>, <10 x i16>* undef, align 1
+  load <11 x i16>, <11 x i16>* undef, align 1
+  load <12 x i16>, <12 x i16>* undef, align 1
+  load <13 x i16>, <13 x i16>* undef, align 1
+  load <14 x i16>, <14 x i16>* undef, align 1
+  load <15 x i16>, <15 x i16>* undef, align 1
+  ; <16 x i16> is YMM
+  load <17 x i16>, <17 x i16>* undef, align 1
+  load <18 x i16>, <18 x i16>* undef, align 1
+  load <19 x i16>, <19 x i16>* undef, align 1
+  load <20 x i16>, <20 x i16>* undef, align 1
+  load <21 x i16>, <21 x i16>* undef, align 1
+  load <22 x i16>, <22 x i16>* undef, align 1
+  load <23 x i16>, <23 x i16>* undef, align 1
+  load <24 x i16>, <24 x i16>* undef, align 1
+  load <25 x i16>, <25 x i16>* undef, align 1
+  load <26 x i16>, <26 x i16>* undef, align 1
+  load <27 x i16>, <27 x i16>* undef, align 1
+  load <28 x i16>, <28 x i16>* undef, align 1
+  load <29 x i16>, <29 x i16>* undef, align 1
+  load <30 x i16>, <30 x i16>* undef, align 1
+  load <31 x i16>, <31 x i16>* undef, align 1
+  ; <32 x i16> is ZMM
 
-  load <47 x i16>, <47 x i16>* undef, align 4
-  load <48 x i16>, <48 x i16>* undef, align 4 ; 3 x i256
-  load <49 x i16>, <49 x i16>* undef, align 4
+  ; Partial vectors with i8 elements
+  load <1 x i8>, <1 x i8>* undef, align 1
+  load <2 x i8>, <2 x i8>* undef, align 1
+  load <3 x i8>, <3 x i8>* undef, align 1
+  load <4 x i8>, <4 x i8>* undef, align 1
+  load <5 x i8>, <5 x i8>* undef, align 1
+  load <6 x i8>, <6 x i8>* undef, align 1
+  load <7 x i8>, <7 x i8>* undef, align 1
+  load <8 x i8>, <8 x i8>* undef, align 1
+  load <9 x i8>, <9 x i8>* undef, align 1
+  load <10 x i8>, <10 x i8>* undef, align 1
+  load <11 x i8>, <11 x i8>* undef, align 1
+  load <12 x i8>, <12 x i8>* undef, align 1
+  load <13 x i8>, <13 x i8>* undef, align 1
+  load <14 x i8>, <14 x i8>* undef, align 1
+  load <15 x i8>, <15 x i8>* undef, align 1
+  ; <16 x i8> is XMM
+  load <17 x i8>, <17 x i8>* undef, align 1
+  load <18 x i8>, <18 x i8>* undef, align 1
+  load <19 x i8>, <19 x i8>* undef, align 1
+  load <20 x i8>, <20 x i8>* undef, align 1
+  load <21 x i8>, <21 x i8>* undef, align 1
+  load <22 x i8>, <22 x i8>* undef, align 1
+  load <23 x i8>, <23 x i8>* undef, align 1
+  load <24 x i8>, <24 x i8>* undef, align 1
+  load <25 x i8>, <25 x i8>* undef, align 1
+  load <26 x i8>, <26 x i8>* undef, align 1
+  load <27 x i8>, <27 x i8>* undef, align 1
+  load <28 x i8>, <28 x i8>* undef, align 1
+  load <29 x i8>, <29 x i8>* undef, align 1
+  load <30 x i8>, <30 x i8>* undef, align 1
+  load <31 x i8>, <31 x i8>* undef, align 1
+  ; <32 x i8> is YMM
+  load <33 x i8>, <33 x i8>* undef, align 1
+  load <34 x i8>, <34 x i8>* undef, align 1
+  load <35 x i8>, <35 x i8>* undef, align 1
+  load <36 x i8>, <36 x i8>* undef, align 1
+  load <37 x i8>, <37 x i8>* undef, align 1
+  load <38 x i8>, <38 x i8>* undef, align 1
+  load <39 x i8>, <39 x i8>* undef, align 1
+  load <40 x i8>, <40 x i8>* undef, align 1
+  load <41 x i8>, <41 x i8>* undef, align 1
+  load <42 x i8>, <42 x i8>* undef, align 1
+  load <43 x i8>, <43 x i8>* undef, align 1
+  load <44 x i8>, <44 x i8>* undef, align 1
+  load <45 x i8>, <45 x i8>* undef, align 1
+  load <46 x i8>, <46 x i8>* undef, align 1
+  load <47 x i8>, <47 x i8>* undef, align 1
+  load <48 x i8>, <48 x i8>* undef, align 1
+  load <49 x i8>, <49 x i8>* undef, align 1
+  load <50 x i8>, <50 x i8>* undef, align 1
+  load <51 x i8>, <51 x i8>* undef, align 1
+  load <52 x i8>, <52 x i8>* undef, align 1
+  load <53 x i8>, <53 x i8>* undef, align 1
+  load <54 x i8>, <54 x i8>* undef, align 1
+  load <55 x i8>, <55 x i8>* undef, align 1
+  load <56 x i8>, <56 x i8>* undef, align 1
+  load <57 x i8>, <57 x i8>* undef, align 1
+  load <58 x i8>, <58 x i8>* undef, align 1
+  load <59 x i8>, <59 x i8>* undef, align 1
+  load <60 x i8>, <60 x i8>* undef, align 1
+  load <61 x i8>, <61 x i8>* undef, align 1
+  load <62 x i8>, <62 x i8>* undef, align 1
+  load <63 x i8>, <63 x i8>* undef, align 1
+  ; <64 x i8> is ZMM
 
   ret i32 undef
 }
 
-define i32 @loads_align(i32 %arg) {
-; SSE-LABEL: 'loads_align'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX-LABEL: 'loads_align'
+define i32 @loads_align64(i32 %arg) {
+  ; Scalars
+; SSE2-LABEL: 'loads_align64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'loads_align64'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'loads_align64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 64
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; AVX512-LABEL: 'loads_align'
+; AVX512-LABEL: 'loads_align64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <16 x i8>, <16 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <8 x i16>, <8 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = load <4 x i32>, <4 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i64>, <2 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <1 x i128>, <1 x i128>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x float>, <4 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x double>, <2 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i8*>, <2 x i8*>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = load <32 x i8>, <32 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = load <16 x i16>, <16 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i32>, <8 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x i64>, <4 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x i128>, <2 x i128>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = load <1 x i256>, <1 x i256>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <8 x float>, <8 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = load <4 x double>, <4 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = load <4 x i8*>, <4 x i8*>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = load <64 x i8>, <64 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = load <32 x i16>, <32 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %30 = load <16 x i32>, <16 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %31 = load <8 x i64>, <8 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = load <4 x i128>, <4 x i128>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %33 = load <2 x i256>, <2 x i256>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = load <1 x i512>, <1 x i512>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = load <16 x float>, <16 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %36 = load <8 x double>, <8 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <8 x i8*>, <8 x i8*>* undef, align 64
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   load i8, i8* undef, align 64
@@ -522,38 +4639,728 @@ define i32 @loads_align(i32 %arg) {
   load i32, i32* undef, align 64
   load i64, i64* undef, align 64
   load i128, i128* undef, align 64
+  load i256, i256* undef, align 64
+  load i512, i512* undef, align 64
 
-  load <2 x i32>, <2 x i32>* undef, align 64
-  load <4 x i32>, <4 x i32>* undef, align 64
-  load <8 x i32>, <8 x i32>* undef, align 64
+  load float, float* undef, align 64
+  load double, double* undef, align 64
 
+  load i8*, i8** undef, align 64
+
+  ; XMM (128-bit) vectors
+  load <16 x i8>, <16 x i8>* undef, align 64
+  load <8 x i16>, <8 x i16>* undef, align 64
+  load <4 x i32>, <4 x i32>* undef, align 64
   load <2 x i64>, <2 x i64>* undef, align 64
+  load <1 x i128>, <1 x i128>* undef, align 64
+
+  load <4 x float>, <4 x float>* undef, align 64
+  load <2 x double>, <2 x double>* undef, align 64
+
+  load <2 x i8*>, <2 x i8*>* undef, align 64
+
+  ; YMM (256-bit) vectors
+  load <32 x i8>, <32 x i8>* undef, align 64
+  load <16 x i16>, <16 x i16>* undef, align 64
+  load <8 x i32>, <8 x i32>* undef, align 64
   load <4 x i64>, <4 x i64>* undef, align 64
+  load <2 x i128>, <2 x i128>* undef, align 64
+  load <1 x i256>, <1 x i256>* undef, align 64
+
+  load <8 x float>, <8 x float>* undef, align 64
+  load <4 x double>, <4 x double>* undef, align 64
+
+  load <4 x i8*>, <4 x i8*>* undef, align 64
+
+  ; ZMM (512-bit) vectors
+  load <64 x i8>, <64 x i8>* undef, align 64
+  load <32 x i16>, <32 x i16>* undef, align 64
+  load <16 x i32>, <16 x i32>* undef, align 64
   load <8 x i64>, <8 x i64>* undef, align 64
+  load <4 x i128>, <4 x i128>* undef, align 64
+  load <2 x i256>, <2 x i256>* undef, align 64
+  load <1 x i512>, <1 x i512>* undef, align 64
 
-  load <3 x float>, <3 x float>* undef, align 64
+  load <16 x float>, <16 x float>* undef, align 64
+  load <8 x double>, <8 x double>* undef, align 64
+
+  load <8 x i8*>, <8 x i8*>* undef, align 64
+
+  ret i32 undef
+}
+
+define i32 @loads_partial_align64(i32 %arg) {
+  ; Partial vectors with i64 elements (doubles as pointer-sized tests))
+; SSE2-LABEL: 'loads_partial_align64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE41-LABEL: 'loads_partial_align64'
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'loads_partial_align64'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'loads_partial_align64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  load <1 x i64>, <1 x i64>* undef, align 64
+  ; <2 x i64> is XMM
+  load <3 x i64>, <3 x i64>* undef, align 64
+  ; <4 x i64> is YMM
+  load <5 x i64>, <5 x i64>* undef, align 64
+  load <6 x i64>, <6 x i64>* undef, align 64
+  load <7 x i64>, <7 x i64>* undef, align 64
+  ; <8 x i64> is ZMM
+
+  ; Partial vectors with double elements
+  load <1 x double>, <1 x double>* undef, align 64
+  ; <2 x double> is XMM
   load <3 x double>, <3 x double>* undef, align 64
+  ; <4 x double> is YMM
+  load <5 x double>, <5 x double>* undef, align 64
+  load <6 x double>, <6 x double>* undef, align 64
+  load <7 x double>, <7 x double>* undef, align 64
+  ; <8 x double> is ZMM
 
+  ; Partial vectors with i32 elements
+  load <1 x i32>, <1 x i32>* undef, align 64
+  load <2 x i32>, <2 x i32>* undef, align 64
   load <3 x i32>, <3 x i32>* undef, align 64
-  load <3 x i64>, <3 x i64>* undef, align 64
+  ; <4 x i32> is XMM
   load <5 x i32>, <5 x i32>* undef, align 64
-  load <5 x i64>, <5 x i64>* undef, align 64
+  load <6 x i32>, <6 x i32>* undef, align 64
+  load <7 x i32>, <7 x i32>* undef, align 64
+  ; <8 x i32> is YMM
+  load <9 x i32>, <9 x i32>* undef, align 64
+  load <10 x i32>, <10 x i32>* undef, align 64
+  load <11 x i32>, <11 x i32>* undef, align 64
+  load <12 x i32>, <12 x i32>* undef, align 64
+  load <13 x i32>, <13 x i32>* undef, align 64
+  load <14 x i32>, <14 x i32>* undef, align 64
+  load <15 x i32>, <15 x i32>* undef, align 64
+  ; <16 x i32> is ZMM
+
+  ; Partial vectors with float elements
+  load <1 x float>, <1 x float>* undef, align 64
+  load <2 x float>, <2 x float>* undef, align 64
+  load <3 x float>, <3 x float>* undef, align 64
+  ; <4 x float> is XMM
+  load <5 x float>, <5 x float>* undef, align 64
+  load <6 x float>, <6 x float>* undef, align 64
+  load <7 x float>, <7 x float>* undef, align 64
+  ; <8 x float> is YMM
+  load <9 x float>, <9 x float>* undef, align 64
+  load <10 x float>, <10 x float>* undef, align 64
+  load <11 x float>, <11 x float>* undef, align 64
+  load <12 x float>, <12 x float>* undef, align 64
+  load <13 x float>, <13 x float>* undef, align 64
+  load <14 x float>, <14 x float>* undef, align 64
+  load <15 x float>, <15 x float>* undef, align 64
+  ; <16 x float> is ZMM
 
+  ; Partial vectors with i16 elements
+  load <1 x i16>, <1 x i16>* undef, align 64
+  load <2 x i16>, <2 x i16>* undef, align 64
+  load <3 x i16>, <3 x i16>* undef, align 64
+  load <4 x i16>, <4 x i16>* undef, align 64
   load <5 x i16>, <5 x i16>* undef, align 64
-  load <6 x i16>, <6 x i16>* undef, align 64 ; 1 x i64 + 1 x i32
+  load <6 x i16>, <6 x i16>* undef, align 64
   load <7 x i16>, <7 x i16>* undef, align 64
-
+  ; <8 x i16> is XMM
+  load <9 x i16>, <9 x i16>* undef, align 64
+  load <10 x i16>, <10 x i16>* undef, align 64
   load <11 x i16>, <11 x i16>* undef, align 64
-  load <12 x i16>, <12 x i16>* undef, align 64 ; 1 x i128 + 1 x i64
+  load <12 x i16>, <12 x i16>* undef, align 64
   load <13 x i16>, <13 x i16>* undef, align 64
-
+  load <14 x i16>, <14 x i16>* undef, align 64
+  load <15 x i16>, <15 x i16>* undef, align 64
+  ; <16 x i16> is YMM
+  load <17 x i16>, <17 x i16>* undef, align 64
+  load <18 x i16>, <18 x i16>* undef, align 64
+  load <19 x i16>, <19 x i16>* undef, align 64
+  load <20 x i16>, <20 x i16>* undef, align 64
+  load <21 x i16>, <21 x i16>* undef, align 64
+  load <22 x i16>, <22 x i16>* undef, align 64
   load <23 x i16>, <23 x i16>* undef, align 64
-  load <24 x i16>, <24 x i16>* undef, align 64 ; 1 x i256 + 1 x i128
+  load <24 x i16>, <24 x i16>* undef, align 64
   load <25 x i16>, <25 x i16>* undef, align 64
+  load <26 x i16>, <26 x i16>* undef, align 64
+  load <27 x i16>, <27 x i16>* undef, align 64
+  load <28 x i16>, <28 x i16>* undef, align 64
+  load <29 x i16>, <29 x i16>* undef, align 64
+  load <30 x i16>, <30 x i16>* undef, align 64
+  load <31 x i16>, <31 x i16>* undef, align 64
+  ; <32 x i16> is ZMM
 
-  load <47 x i16>, <47 x i16>* undef, align 64
-  load <48 x i16>, <48 x i16>* undef, align 64 ; 3 x i256
-  load <49 x i16>, <49 x i16>* undef, align 64
+  ; Partial vectors with i8 elements
+  load <1 x i8>, <1 x i8>* undef, align 64
+  load <2 x i8>, <2 x i8>* undef, align 64
+  load <3 x i8>, <3 x i8>* undef, align 64
+  load <4 x i8>, <4 x i8>* undef, align 64
+  load <5 x i8>, <5 x i8>* undef, align 64
+  load <6 x i8>, <6 x i8>* undef, align 64
+  load <7 x i8>, <7 x i8>* undef, align 64
+  load <8 x i8>, <8 x i8>* undef, align 64
+  load <9 x i8>, <9 x i8>* undef, align 64
+  load <10 x i8>, <10 x i8>* undef, align 64
+  load <11 x i8>, <11 x i8>* undef, align 64
+  load <12 x i8>, <12 x i8>* undef, align 64
+  load <13 x i8>, <13 x i8>* undef, align 64
+  load <14 x i8>, <14 x i8>* undef, align 64
+  load <15 x i8>, <15 x i8>* undef, align 64
+  ; <16 x i8> is XMM
+  load <17 x i8>, <17 x i8>* undef, align 64
+  load <18 x i8>, <18 x i8>* undef, align 64
+  load <19 x i8>, <19 x i8>* undef, align 64
+  load <20 x i8>, <20 x i8>* undef, align 64
+  load <21 x i8>, <21 x i8>* undef, align 64
+  load <22 x i8>, <22 x i8>* undef, align 64
+  load <23 x i8>, <23 x i8>* undef, align 64
+  load <24 x i8>, <24 x i8>* undef, align 64
+  load <25 x i8>, <25 x i8>* undef, align 64
+  load <26 x i8>, <26 x i8>* undef, align 64
+  load <27 x i8>, <27 x i8>* undef, align 64
+  load <28 x i8>, <28 x i8>* undef, align 64
+  load <29 x i8>, <29 x i8>* undef, align 64
+  load <30 x i8>, <30 x i8>* undef, align 64
+  load <31 x i8>, <31 x i8>* undef, align 64
+  ; <32 x i8> is YMM
+  load <33 x i8>, <33 x i8>* undef, align 64
+  load <34 x i8>, <34 x i8>* undef, align 64
+  load <35 x i8>, <35 x i8>* undef, align 64
+  load <36 x i8>, <36 x i8>* undef, align 64
+  load <37 x i8>, <37 x i8>* undef, align 64
+  load <38 x i8>, <38 x i8>* undef, align 64
+  load <39 x i8>, <39 x i8>* undef, align 64
+  load <40 x i8>, <40 x i8>* undef, align 64
+  load <41 x i8>, <41 x i8>* undef, align 64
+  load <42 x i8>, <42 x i8>* undef, align 64
+  load <43 x i8>, <43 x i8>* undef, align 64
+  load <44 x i8>, <44 x i8>* undef, align 64
+  load <45 x i8>, <45 x i8>* undef, align 64
+  load <46 x i8>, <46 x i8>* undef, align 64
+  load <47 x i8>, <47 x i8>* undef, align 64
+  load <48 x i8>, <48 x i8>* undef, align 64
+  load <49 x i8>, <49 x i8>* undef, align 64
+  load <50 x i8>, <50 x i8>* undef, align 64
+  load <51 x i8>, <51 x i8>* undef, align 64
+  load <52 x i8>, <52 x i8>* undef, align 64
+  load <53 x i8>, <53 x i8>* undef, align 64
+  load <54 x i8>, <54 x i8>* undef, align 64
+  load <55 x i8>, <55 x i8>* undef, align 64
+  load <56 x i8>, <56 x i8>* undef, align 64
+  load <57 x i8>, <57 x i8>* undef, align 64
+  load <58 x i8>, <58 x i8>* undef, align 64
+  load <59 x i8>, <59 x i8>* undef, align 64
+  load <60 x i8>, <60 x i8>* undef, align 64
+  load <61 x i8>, <61 x i8>* undef, align 64
+  load <62 x i8>, <62 x i8>* undef, align 64
+  load <63 x i8>, <63 x i8>* undef, align 64
+  ; <64 x i8> is ZMM
 
   ret i32 undef
 }


        


More information about the llvm-commits mailing list