[llvm] 0fa0ce1 - [CostModel][X86] Update SK_Broadcast based on cost kinds (#150620)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 26 05:52:52 PDT 2025
Author: Simon Pilgrim
Date: 2025-07-26T13:52:47+01:00
New Revision: 0fa0ce1f3af316f21e61286da732ef5c96da495b
URL: https://github.com/llvm/llvm-project/commit/0fa0ce1f3af316f21e61286da732ef5c96da495b
DIFF: https://github.com/llvm/llvm-project/commit/0fa0ce1f3af316f21e61286da732ef5c96da495b.diff
LOG: [CostModel][X86] Update SK_Broadcast based on cost kinds (#150620)
When these were converted to CostKindTblEntry the throughput was mainly copied to all cost kinds
Regenerated with my check_cost_tables.py helper script
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/fshl.ll
llvm/test/Analysis/CostModel/X86/fshr.ll
llvm/test/Analysis/CostModel/X86/load-broadcast.ll
llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll
llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll
llvm/test/Analysis/CostModel/X86/shuffle-load.ll
llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
llvm/test/Analysis/CostModel/X86/shuffle-splat.ll
llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 37a7b37e4133a..6d2c7b4eb76de 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1838,9 +1838,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb
+ { TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
{ TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
{ TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
@@ -1874,13 +1874,20 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8f64, { 1, 1, 1, 1 } }, // vbroadcastsd
- {TTI::SK_Broadcast, MVT::v16f32, { 1, 1, 1, 1 } }, // vbroadcastss
- {TTI::SK_Broadcast, MVT::v8i64, { 1, 1, 1, 1 } }, // vpbroadcastq
- {TTI::SK_Broadcast, MVT::v16i32, { 1, 1, 1, 1 } }, // vpbroadcastd
- {TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v8f64, { 1, 3, 1, 1 } }, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 1 } }, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v16f32, { 1, 3, 1, 1 } }, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 1 } }, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8i64, { 1, 3, 1, 1 } }, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 1 } }, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v16i32, { 1, 3, 1, 1 } }, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 1 } }, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 1 }}, // vpbroadcastb
{TTI::SK_Reverse, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd
{TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
@@ -1973,13 +1980,16 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, { 1, 1, 1, 1 } }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v8f32, { 1, 1, 1, 1 } }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v4i64, { 1, 1, 1, 1 } }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v8i32, { 1, 1, 1, 1 } }, // vpbroadcastd
- { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32i8, { 1, 1, 1, 1 } }, // vpbroadcastb
+ { TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 2 } }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 2 } }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 2 } }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 2 } }, // vpbroadcastd
+ { TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 2 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v8i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 2 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v8f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 2 } }, // vpbroadcastb
+ { TTI::SK_Broadcast, MVT::v16i8, { 1, 3, 1, 1 } }, // vpbroadcastb
{ TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
{ TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
@@ -2077,13 +2087,13 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX1ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v32i8, {2,2,2,2}}, // vpshufb + vinsertf128
+ {TTI::SK_Broadcast, MVT::v4f64, {2,3,2,3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8f32, {2,3,2,3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v4i64, {2,3,2,3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8i32, {2,3,2,3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v16i16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v16f16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v32i8, {3,4,3,6}}, // vpshufb + vinsertf128
{TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
{TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
@@ -2156,9 +2166,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry SSSE3ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Broadcast, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Broadcast, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 3, 2, 2}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 3, 2, 2}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v16i8, {1, 3, 2, 2}}, // pshufb
{TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
{TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
@@ -2192,9 +2202,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_Broadcast, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_Broadcast, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_Broadcast, MVT::v8i16, {2, 2, 2, 2}}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v8f16, {2, 2, 2, 2}}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v16i8, {3, 3, 3, 3}}, // unpck + pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v16i8, {2, 3, 3, 4}}, // unpck + pshuflw + pshufd
{TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index 485e7f6abf965..c53a5072ff984 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -414,8 +414,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_funnel_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:13 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -423,8 +423,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_funnel_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:11 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -432,8 +432,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512F-LABEL: 'splatvar_funnel_i64'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -441,8 +441,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512BW-LABEL: 'splatvar_funnel_i64'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -450,8 +450,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512DQ-LABEL: 'splatvar_funnel_i64'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -459,8 +459,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i64'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -486,8 +486,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_funnel_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -495,8 +495,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i64'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -532,8 +532,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_funnel_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -541,8 +541,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_funnel_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -550,8 +550,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512F-LABEL: 'splatvar_funnel_i32'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -559,8 +559,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512BW-LABEL: 'splatvar_funnel_i32'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -568,8 +568,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512DQ-LABEL: 'splatvar_funnel_i32'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -577,8 +577,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i32'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -604,8 +604,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_funnel_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -613,8 +613,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i32'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -631,108 +631,108 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:7 Lat:12 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:23 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:25 Lat:45 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
@@ -749,108 +749,108 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:29 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:35 Lat:57 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:69 Lat:113 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:18 SizeLat:22 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:36 Lat:26 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:72 Lat:52 SizeLat:94 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:20 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:21 SizeLat:27 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:42 SizeLat:54 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:16 Lat:31 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:31 Lat:61 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:61 Lat:121 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
@@ -1934,8 +1934,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_rotate_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:18 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:36 SizeLat:44 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1943,8 +1943,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_rotate_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1952,8 +1952,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512-LABEL: 'splatvar_rotate_i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1979,8 +1979,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_rotate_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -2007,8 +2007,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_rotate_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2016,8 +2016,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_rotate_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2025,8 +2025,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512-LABEL: 'splatvar_rotate_i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2052,8 +2052,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_rotate_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2070,99 +2070,99 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSE-LABEL: 'splatvar_rotate_i16'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
@@ -2179,99 +2179,99 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSE-LABEL: 'splatvar_rotate_i8'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:19 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:21 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:42 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:32 Lat:36 SizeLat:46 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index af9a91b6d453e..1990605061716 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -414,8 +414,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_funnel_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:13 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -423,8 +423,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_funnel_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:11 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -432,8 +432,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512F-LABEL: 'splatvar_funnel_i64'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -441,8 +441,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512BW-LABEL: 'splatvar_funnel_i64'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -450,8 +450,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512DQ-LABEL: 'splatvar_funnel_i64'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -459,8 +459,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i64'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -486,8 +486,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_funnel_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -495,8 +495,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i64'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -532,8 +532,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_funnel_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -541,8 +541,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_funnel_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -550,8 +550,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512F-LABEL: 'splatvar_funnel_i32'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -559,8 +559,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512BW-LABEL: 'splatvar_funnel_i32'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -568,8 +568,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512DQ-LABEL: 'splatvar_funnel_i32'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -577,8 +577,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i32'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -604,8 +604,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_funnel_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -613,8 +613,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i32'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -631,108 +631,108 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:7 Lat:12 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:23 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:25 Lat:45 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
@@ -749,108 +749,108 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:29 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:35 Lat:57 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:69 Lat:113 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:18 SizeLat:22 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:36 Lat:26 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:72 Lat:52 SizeLat:94 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:20 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:21 SizeLat:27 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:42 SizeLat:54 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:16 Lat:31 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:31 Lat:61 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:61 Lat:121 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
@@ -1934,8 +1934,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_rotate_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:18 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:36 SizeLat:44 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1943,8 +1943,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_rotate_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1952,8 +1952,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512-LABEL: 'splatvar_rotate_i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1979,8 +1979,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_rotate_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -2007,8 +2007,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_rotate_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2016,8 +2016,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_rotate_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2025,8 +2025,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512-LABEL: 'splatvar_rotate_i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2052,8 +2052,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_rotate_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2070,99 +2070,99 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSE-LABEL: 'splatvar_rotate_i16'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
@@ -2179,99 +2179,99 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSE-LABEL: 'splatvar_rotate_i8'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:19 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:21 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:42 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:32 Lat:36 SizeLat:46 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
diff --git a/llvm/test/Analysis/CostModel/X86/load-broadcast.ll b/llvm/test/Analysis/CostModel/X86/load-broadcast.ll
index 3c50684562f9d..ecf54ce9fe938 100644
--- a/llvm/test/Analysis/CostModel/X86/load-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/X86/load-broadcast.ll
@@ -123,19 +123,19 @@ define <4 x double> @broadcast_load_f64_v4f64(ptr %src) {
; AVX1-LABEL: 'broadcast_load_f64_v4f64'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; AVX2-LABEL: 'broadcast_load_f64_v4f64'
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; AVX512-LABEL: 'broadcast_load_f64_v4f64'
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
%load = load double, ptr %src
@@ -154,19 +154,19 @@ define <8 x double> @broadcast_load_f64_v8f64(ptr %src) {
; AVX1-LABEL: 'broadcast_load_f64_v8f64'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; AVX2-LABEL: 'broadcast_load_f64_v8f64'
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; AVX512-LABEL: 'broadcast_load_f64_v8f64'
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
%load = load double, ptr %src
@@ -338,22 +338,22 @@ define <16 x i32> @broadcast_load_v4i32_v16i32(ptr %src) {
define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v8i16_v8i16'
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v8i16_v8i16'
; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; SSE42-LABEL: 'broadcast_load_v8i16_v8i16'
; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; AVX1-LABEL: 'broadcast_load_v8i16_v8i16'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; AVX2-LABEL: 'broadcast_load_v8i16_v8i16'
@@ -374,22 +374,22 @@ define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) {
define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v8i16_v16i16'
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v8i16_v16i16'
; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; SSE42-LABEL: 'broadcast_load_v8i16_v16i16'
; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; AVX1-LABEL: 'broadcast_load_v8i16_v16i16'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; AVX2-LABEL: 'broadcast_load_v8i16_v16i16'
@@ -410,22 +410,22 @@ define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) {
define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v8i16_v32i16'
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v8i16_v32i16'
; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; SSE42-LABEL: 'broadcast_load_v8i16_v32i16'
; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; AVX1-LABEL: 'broadcast_load_v8i16_v32i16'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; AVX2-LABEL: 'broadcast_load_v8i16_v32i16'
@@ -450,22 +450,22 @@ define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) {
define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v16i8_v16i8'
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v16i8_v16i8'
; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; SSE42-LABEL: 'broadcast_load_v16i8_v16i8'
; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; AVX1-LABEL: 'broadcast_load_v16i8_v16i8'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; AVX2-LABEL: 'broadcast_load_v16i8_v16i8'
@@ -486,22 +486,22 @@ define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) {
define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v16i8_v32i8'
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v16i8_v32i8'
; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; SSE42-LABEL: 'broadcast_load_v16i8_v32i8'
; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; AVX1-LABEL: 'broadcast_load_v16i8_v32i8'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; AVX2-LABEL: 'broadcast_load_v16i8_v32i8'
@@ -522,22 +522,22 @@ define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) {
define <64 x i8> @broadcast_load_v16i8_v64i8(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v16i8_v64i8'
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v16i8_v64i8'
; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; SSE42-LABEL: 'broadcast_load_v16i8_v64i8'
; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; AVX1-LABEL: 'broadcast_load_v16i8_v64i8'
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; AVX2-LABEL: 'broadcast_load_v16i8_v64i8'
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
index 30316292aec41..aea7cc8f036be 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
@@ -2032,7 +2032,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX1-LABEL: 'test_gather_16f32_const_mask2'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2040,7 +2040,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2048,7 +2048,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; SKL-LABEL: 'test_gather_16f32_const_mask2'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2056,7 +2056,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX512-LABEL: 'test_gather_16f32_const_mask2'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2091,7 +2091,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX1-LABEL: 'test_scatter_16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX1-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2099,7 +2099,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX2-LABEL: 'test_scatter_16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2107,7 +2107,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; SKL-LABEL: 'test_scatter_16i32'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SKL-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2115,7 +2115,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX512-LABEL: 'test_scatter_16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX512-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 8fae37b3b1cac..742b5b2ad62f9 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -2032,7 +2032,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX1-LABEL: 'test_gather_16f32_const_mask2'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2040,7 +2040,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2048,7 +2048,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; SKL-LABEL: 'test_gather_16f32_const_mask2'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2056,7 +2056,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX512-LABEL: 'test_gather_16f32_const_mask2'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2091,7 +2091,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX1-LABEL: 'test_scatter_16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX1-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2099,7 +2099,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX2-LABEL: 'test_scatter_16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2107,7 +2107,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; SKL-LABEL: 'test_scatter_16i32'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SKL-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2115,7 +2115,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX512-LABEL: 'test_scatter_16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX512-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll
index a8ceebf8e28b6..c4cf8b79eb492 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll
@@ -3,11 +3,11 @@
define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) {
; CHECK-LABEL: 'test_vXf16'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll
index 1a2d09834176d..79e16225be79a 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll
@@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
;
; AVX1-LABEL: 'test_vXf64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
@@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
;
; AVX1-LABEL: 'test_vXi64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; AVX1-LABEL: 'test_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
@@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; AVX1-LABEL: 'test_vXi32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -154,41 +154,41 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128
; SSE2-LABEL: 'test_vXf16'
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXf16'
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXf16'
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf16'
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
@@ -244,49 +244,49 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
@@ -302,54 +302,54 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi8'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
@@ -365,46 +365,46 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1>
; SSE2-LABEL: 'test_vXi1'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi1'
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi1'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi1'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi1'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi1'
@@ -440,22 +440,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
; AVX1-LABEL: 'test_upper_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_upper_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_upper_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
index ccfb5437a75a6..8bb9cbc12ad81 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
@@ -32,21 +32,21 @@ define void @shuffle_load() {
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -66,11 +66,11 @@ define void @shuffle_load() {
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -95,21 +95,21 @@ define void @shuffle_load() {
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -129,11 +129,11 @@ define void @shuffle_load() {
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -158,21 +158,21 @@ define void @shuffle_load() {
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -192,11 +192,11 @@ define void @shuffle_load() {
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -221,21 +221,21 @@ define void @shuffle_load() {
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -255,11 +255,11 @@ define void @shuffle_load() {
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -284,21 +284,21 @@ define void @shuffle_load() {
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -318,11 +318,11 @@ define void @shuffle_load() {
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -347,21 +347,21 @@ define void @shuffle_load() {
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -381,11 +381,11 @@ define void @shuffle_load() {
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
index a7732920dd249..11f14df25dc62 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
@@ -214,7 +214,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -223,7 +223,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; SSE42-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -232,7 +232,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -241,7 +241,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -250,7 +250,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -259,7 +259,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -268,7 +268,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -277,7 +277,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi16'
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
@@ -305,7 +305,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -314,7 +314,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -323,7 +323,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -332,7 +332,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -341,7 +341,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -350,7 +350,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -359,7 +359,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -368,7 +368,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi8'
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll
index 6d22546d2887a..c98efcd7fdbb7 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll
@@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
;
; AVX1-LABEL: 'test_vXf64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
;
; AVX1-LABEL: 'test_vXi64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
@@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; AVX1-LABEL: 'test_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
@@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; AVX1-LABEL: 'test_vXi32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -154,41 +154,41 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128
; SSE2-LABEL: 'test_vXf16'
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXf16'
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXf16'
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf16'
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
@@ -244,49 +244,49 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
@@ -302,54 +302,54 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi8'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
@@ -365,46 +365,46 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1>
; SSE2-LABEL: 'test_vXi1'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi1'
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi1'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi1'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi1'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi1'
@@ -440,22 +440,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
; AVX1-LABEL: 'test_upper_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_upper_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_upper_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
index bd642791d00de..c53da908aa490 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
@@ -531,31 +531,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -574,31 +574,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -660,31 +660,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -703,31 +703,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -740,43 +740,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -789,43 +789,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -838,67 +838,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -911,67 +911,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -984,67 +984,67 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:16 Lat:11 SizeLat:21 for: %shift = ashr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1057,67 +1057,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:32 Lat:22 SizeLat:42 for: %shift = ashr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index fdf011ef4d3f9..7a06269ed1cde 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -531,31 +531,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -574,31 +574,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -660,31 +660,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -703,31 +703,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -740,43 +740,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -789,43 +789,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -838,67 +838,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -911,67 +911,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -984,67 +984,67 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:16 Lat:11 SizeLat:21 for: %shift = ashr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1057,67 +1057,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:32 Lat:22 SizeLat:42 for: %shift = ashr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
index 2f70a4bd4a39b..ee7546fa12a0b 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
@@ -527,31 +527,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -570,31 +570,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -656,31 +656,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -699,31 +699,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -736,43 +736,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -785,43 +785,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -834,67 +834,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -907,67 +907,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -980,43 +980,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:9 SizeLat:14 for: %shift = lshr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1029,67 +1029,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:18 SizeLat:28 for: %shift = lshr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 78fd9f33e797c..a508c2ae96035 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -527,31 +527,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -570,31 +570,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -656,31 +656,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -699,31 +699,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -736,43 +736,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -785,43 +785,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -834,67 +834,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -907,67 +907,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -980,43 +980,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:9 SizeLat:14 for: %shift = lshr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1029,67 +1029,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:18 SizeLat:28 for: %shift = lshr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
index adedca8ce6926..375dac51ff7d0 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
@@ -567,31 +567,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -610,31 +610,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -696,31 +696,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -739,31 +739,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -776,49 +776,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v8i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -831,49 +831,49 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -886,73 +886,73 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -965,49 +965,49 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:8 for: %shift = shl <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -1020,49 +1020,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:11 Lat:8 SizeLat:14 for: %shift = shl <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1075,73 +1075,73 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:22 Lat:16 SizeLat:28 for: %shift = shl <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v64i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index a3536ecee0179..b2435be3d1c27 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -567,31 +567,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -610,31 +610,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -696,31 +696,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -739,31 +739,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -776,49 +776,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v8i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -831,49 +831,49 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -886,73 +886,73 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -965,49 +965,49 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:8 for: %shift = shl <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -1020,49 +1020,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:11 Lat:8 SizeLat:14 for: %shift = shl <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1075,73 +1075,73 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:22 Lat:16 SizeLat:28 for: %shift = shl <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v64i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index 049bb2e5073fe..c4d133aa3b6ed 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -18,7 +18,7 @@ define void @fextr(ptr %ptr) {
; YAML-NEXT: Function: fextr
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '-20'
+; YAML-NEXT: - Cost: '-21'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '4'
More information about the llvm-commits
mailing list