[llvm] 68ef68f - [CostModel][X86] Improve accuracy of vXi8/vXi16 vector non-uniform shift costs on AVX2/AVX512 targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue May 25 03:38:19 PDT 2021


Author: Simon Pilgrim
Date: 2021-05-25T11:35:57+01:00
New Revision: 68ef68f8ac5cad1fdcd9c7b0e2a8f134d9f595ae

URL: https://github.com/llvm/llvm-project/commit/68ef68f8ac5cad1fdcd9c7b0e2a8f134d9f595ae
DIFF: https://github.com/llvm/llvm-project/commit/68ef68f8ac5cad1fdcd9c7b0e2a8f134d9f595ae.diff

LOG: [CostModel][X86] Improve accuracy of vXi8/vXi16 vector non-uniform shift costs on AVX2/AVX512 targets

Determined from llvm-mca analysis, AVX2+ capable targets have a higher throughput for VPBLENDVB and VPMOVZX ops, making it cheaper to perform shift+select patterns for vXi8 shifts or extend/shift/truncate for vXi16 shifts. Similarly AVX512BW can perform vXi8 as extend/shift/truncate patterns.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/div.ll
    llvm/test/Analysis/CostModel/X86/fshl.ll
    llvm/test/Analysis/CostModel/X86/fshr.ll
    llvm/test/Analysis/CostModel/X86/rem.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index b37bd71e17c6d..a731a64060333 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -507,14 +507,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
   }
 
   static const CostTblEntry AVX512BWShiftCostTable[] = {
+    { ISD::SHL,   MVT::v16i8,      4 }, // extend/vpsllvw/pack sequence.
+    { ISD::SRL,   MVT::v16i8,      4 }, // extend/vpsrlvw/pack sequence.
+    { ISD::SRA,   MVT::v16i8,      4 }, // extend/vpsravw/pack sequence.
+    { ISD::SHL,   MVT::v32i8,      4 }, // extend/vpsllvw/pack sequence.
+    { ISD::SRL,   MVT::v32i8,      4 }, // extend/vpsrlvw/pack sequence.
+    { ISD::SRA,   MVT::v32i8,      6 }, // extend/vpsravw/pack sequence.
+    { ISD::SHL,   MVT::v64i8,      6 }, // extend/vpsllvw/pack sequence.
+    { ISD::SRL,   MVT::v64i8,      7 }, // extend/vpsrlvw/pack sequence.
+    { ISD::SRA,   MVT::v64i8,     15 }, // extend/vpsravw/pack sequence.
+
     { ISD::SHL,   MVT::v8i16,      1 }, // vpsllvw
     { ISD::SRL,   MVT::v8i16,      1 }, // vpsrlvw
     { ISD::SRA,   MVT::v8i16,      1 }, // vpsravw
-
     { ISD::SHL,   MVT::v16i16,     1 }, // vpsllvw
     { ISD::SRL,   MVT::v16i16,     1 }, // vpsrlvw
     { ISD::SRA,   MVT::v16i16,     1 }, // vpsravw
-
     { ISD::SHL,   MVT::v32i16,     1 }, // vpsllvw
     { ISD::SRL,   MVT::v32i16,     1 }, // vpsrlvw
     { ISD::SRA,   MVT::v32i16,     1 }, // vpsravw
@@ -760,20 +768,26 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
   }
 
   static const CostTblEntry AVX2CostTable[] = {
-    { ISD::SHL,  MVT::v32i8,     11 }, // vpblendvb sequence.
-    { ISD::SHL,  MVT::v64i8,     22 }, // 2*vpblendvb sequence.
-    { ISD::SHL,  MVT::v16i16,    10 }, // extend/vpsrlvd/pack sequence.
-    { ISD::SHL,  MVT::v32i16,    20 }, // 2*extend/vpsrlvd/pack sequence.
-
-    { ISD::SRL,  MVT::v32i8,     11 }, // vpblendvb sequence.
-    { ISD::SRL,  MVT::v64i8,     22 }, // 2*vpblendvb sequence.
-    { ISD::SRL,  MVT::v16i16,    10 }, // extend/vpsrlvd/pack sequence.
-    { ISD::SRL,  MVT::v32i16,    20 }, // 2*extend/vpsrlvd/pack sequence.
-
-    { ISD::SRA,  MVT::v32i8,     24 }, // vpblendvb sequence.
-    { ISD::SRA,  MVT::v64i8,     48 }, // 2*vpblendvb sequence.
-    { ISD::SRA,  MVT::v16i16,    10 }, // extend/vpsravd/pack sequence.
-    { ISD::SRA,  MVT::v32i16,    20 }, // 2*extend/vpsravd/pack sequence.
+    { ISD::SHL,  MVT::v16i8,      6 }, // vpblendvb sequence.
+    { ISD::SHL,  MVT::v32i8,      6 }, // vpblendvb sequence.
+    { ISD::SHL,  MVT::v64i8,     12 }, // 2*vpblendvb sequence.
+    { ISD::SHL,  MVT::v8i16,      5 }, // extend/vpsrlvd/pack sequence.
+    { ISD::SHL,  MVT::v16i16,     7 }, // extend/vpsrlvd/pack sequence.
+    { ISD::SHL,  MVT::v32i16,    14 }, // 2*extend/vpsrlvd/pack sequence.
+
+    { ISD::SRL,  MVT::v16i8,      6 }, // vpblendvb sequence.
+    { ISD::SRL,  MVT::v32i8,      6 }, // vpblendvb sequence.
+    { ISD::SRL,  MVT::v64i8,     12 }, // 2*vpblendvb sequence.
+    { ISD::SRL,  MVT::v8i16,      5 }, // extend/vpsrlvd/pack sequence.
+    { ISD::SRL,  MVT::v16i16,     7 }, // extend/vpsrlvd/pack sequence.
+    { ISD::SRL,  MVT::v32i16,    14 }, // 2*extend/vpsrlvd/pack sequence.
+
+    { ISD::SRA,  MVT::v16i8,     17 }, // vpblendvb sequence.
+    { ISD::SRA,  MVT::v32i8,     17 }, // vpblendvb sequence.
+    { ISD::SRA,  MVT::v64i8,     34 }, // 2*vpblendvb sequence.
+    { ISD::SRA,  MVT::v8i16,      5 }, // extend/vpsravd/pack sequence.
+    { ISD::SRA,  MVT::v16i16,     7 }, // extend/vpsravd/pack sequence.
+    { ISD::SRA,  MVT::v32i16,    14 }, // 2*extend/vpsravd/pack sequence.
     { ISD::SRA,  MVT::v2i64,      4 }, // srl/xor/sub sequence.
     { ISD::SRA,  MVT::v4i64,      4 }, // srl/xor/sub sequence.
 

diff  --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll
index 84304b27b0aa0..487264e384b0a 100644
--- a/llvm/test/Analysis/CostModel/X86/div.ll
+++ b/llvm/test/Analysis/CostModel/X86/div.ll
@@ -793,13 +793,13 @@ define i32 @sdiv_constpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_constpow2'
@@ -812,13 +812,13 @@ define i32 @sdiv_constpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_constpow2'
@@ -835,9 +835,9 @@ define i32 @sdiv_constpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'sdiv_constpow2'
@@ -988,13 +988,13 @@ define i32 @udiv_constpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'udiv_constpow2'
@@ -1007,13 +1007,13 @@ define i32 @udiv_constpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_constpow2'
@@ -1030,9 +1030,9 @@ define i32 @udiv_constpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'udiv_constpow2'

diff  --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index a82d676ae6ab8..f961422193656 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -164,16 +164,16 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX2-LABEL: 'var_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i16'
@@ -185,9 +185,9 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX512DQ-LABEL: 'var_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i16'
@@ -242,30 +242,30 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ;
 ; AVX2-LABEL: 'var_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'var_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i8'
@@ -594,36 +594,36 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatvar_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i8'
@@ -820,16 +820,16 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX2-LABEL: 'constant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i16'
@@ -841,9 +841,9 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX512DQ-LABEL: 'constant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i16'
@@ -898,30 +898,30 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ;
 ; AVX2-LABEL: 'constant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'constant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i8'
@@ -1368,16 +1368,16 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX2-LABEL: 'var_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 %c16)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 %c16)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_rotate_i16'
@@ -1389,9 +1389,9 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX512DQ-LABEL: 'var_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 %c16)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_rotate_i16'
@@ -1446,30 +1446,30 @@ define void @var_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ;
 ; AVX2-LABEL: 'var_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'var_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_rotate_i8'
@@ -1771,36 +1771,36 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatvar_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_rotate_i8'
@@ -1990,16 +1990,16 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX2-LABEL: 'constant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_rotate_i16'
@@ -2011,9 +2011,9 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX512DQ-LABEL: 'constant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_rotate_i16'
@@ -2068,30 +2068,30 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ;
 ; AVX2-LABEL: 'constant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'constant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_rotate_i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index 3e838ba36eaca..1712f26f4dfa0 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -164,16 +164,16 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX2-LABEL: 'var_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i16'
@@ -185,9 +185,9 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX512DQ-LABEL: 'var_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i16'
@@ -242,30 +242,30 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ;
 ; AVX2-LABEL: 'var_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'var_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i8'
@@ -594,36 +594,36 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatvar_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i8'
@@ -820,16 +820,16 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX2-LABEL: 'constant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i16'
@@ -841,9 +841,9 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX512DQ-LABEL: 'constant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i16'
@@ -898,30 +898,30 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ;
 ; AVX2-LABEL: 'constant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'constant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i8'
@@ -1369,16 +1369,16 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX2-LABEL: 'var_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_rotate_i16'
@@ -1390,9 +1390,9 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ;
 ; AVX512DQ-LABEL: 'var_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_rotate_i16'
@@ -1447,30 +1447,30 @@ define void @var_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ;
 ; AVX2-LABEL: 'var_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'var_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'var_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_rotate_i8'
@@ -1772,36 +1772,36 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatvar_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_rotate_i8'
@@ -1991,16 +1991,16 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX2-LABEL: 'constant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_rotate_i16'
@@ -2012,9 +2012,9 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ;
 ; AVX512DQ-LABEL: 'constant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_rotate_i16'
@@ -2069,30 +2069,30 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ;
 ; AVX2-LABEL: 'constant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'constant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'constant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_rotate_i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll
index 9dcca434f5532..640292450fb48 100644
--- a/llvm/test/Analysis/CostModel/X86/rem.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem.ll
@@ -812,13 +812,13 @@ define i32 @srem_constpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_constpow2'
@@ -831,13 +831,13 @@ define i32 @srem_constpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_constpow2'
@@ -854,9 +854,9 @@ define i32 @srem_constpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_constpow2'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
index bb116ebc25d50..6f69bac80a2eb 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
@@ -225,16 +225,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'var_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'var_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'var_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v8i16'
@@ -242,12 +246,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'var_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -267,7 +275,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i16'
@@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v16i16'
@@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v16i16'
@@ -312,7 +320,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i16'
@@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v32i16'
@@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v32i16'
@@ -352,17 +360,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'var_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'var_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'var_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'var_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -382,16 +410,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %b
@@ -415,7 +455,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v64i8'
@@ -423,19 +463,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v64i8'
@@ -922,11 +962,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -934,11 +980,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -968,7 +1038,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -983,11 +1053,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
@@ -1023,7 +1111,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1041,25 +1129,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
@@ -1281,16 +1369,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'constant_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'constant_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'constant_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v8i16'
@@ -1298,12 +1390,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
@@ -1323,7 +1419,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i16'
@@ -1331,7 +1427,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v16i16'
@@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v16i16'
@@ -1368,7 +1464,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i16'
@@ -1376,7 +1472,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v32i16'
@@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v32i16'
@@ -1408,17 +1504,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'constant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'constant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'constant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -1438,16 +1554,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -1471,7 +1599,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v64i8'
@@ -1479,19 +1607,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index 2fa4217e0fa76..3199b1724e4d9 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -225,16 +225,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'var_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'var_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'var_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v8i16'
@@ -242,12 +246,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'var_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -267,7 +275,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i16'
@@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v16i16'
@@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v16i16'
@@ -312,7 +320,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i16'
@@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v32i16'
@@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v32i16'
@@ -352,17 +360,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'var_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'var_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'var_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'var_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -382,16 +410,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %b
@@ -415,7 +455,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v64i8'
@@ -423,19 +463,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v64i8'
@@ -922,11 +962,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -934,11 +980,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -968,7 +1038,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -983,11 +1053,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -1023,7 +1111,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1041,25 +1129,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
@@ -1281,16 +1369,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'constant_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'constant_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'constant_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v8i16'
@@ -1298,12 +1390,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
@@ -1323,7 +1419,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i16'
@@ -1331,7 +1427,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v16i16'
@@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v16i16'
@@ -1368,7 +1464,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i16'
@@ -1376,7 +1472,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v32i16'
@@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v32i16'
@@ -1408,17 +1504,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'constant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'constant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'constant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -1438,16 +1554,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -1471,7 +1599,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v64i8'
@@ -1479,19 +1607,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
index cae53b878a53c..2ae0e7a4100bc 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
@@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'var_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'var_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'var_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v8i16'
@@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'var_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i16'
@@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v16i16'
@@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v16i16'
@@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i16'
@@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v32i16'
@@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v32i16'
@@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'var_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'var_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'var_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'var_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %b
@@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v64i8'
@@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v64i8'
@@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
@@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
@@ -1297,16 +1385,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'constant_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'constant_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'constant_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v8i16'
@@ -1314,12 +1406,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
@@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i16'
@@ -1347,7 +1443,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v16i16'
@@ -1355,7 +1451,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v16i16'
@@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i16'
@@ -1392,7 +1488,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v32i16'
@@ -1400,7 +1496,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v32i16'
@@ -1424,17 +1520,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'constant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'constant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'constant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -1454,16 +1570,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -1487,7 +1615,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v64i8'
@@ -1495,19 +1623,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 7f61ee8b7bd91..f0845bbdeb8eb 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'var_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'var_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'var_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v8i16'
@@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'var_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i16'
@@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v16i16'
@@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v16i16'
@@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i16'
@@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v32i16'
@@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v32i16'
@@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'var_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'var_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'var_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'var_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %b
@@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v64i8'
@@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v64i8'
@@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
@@ -1297,16 +1385,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'constant_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'constant_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'constant_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v8i16'
@@ -1314,12 +1406,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
@@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i16'
@@ -1347,7 +1443,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v16i16'
@@ -1355,7 +1451,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v16i16'
@@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i16'
@@ -1392,7 +1488,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v32i16'
@@ -1400,7 +1496,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v32i16'
@@ -1424,17 +1520,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'constant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'constant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'constant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -1454,16 +1570,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -1487,7 +1615,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v64i8'
@@ -1495,19 +1623,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
index 84011e14cbad7..46951d26350f9 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
@@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'var_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'var_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'var_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v8i16'
@@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'var_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i16'
@@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v16i16'
@@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v16i16'
@@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i16'
@@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v32i16'
@@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v32i16'
@@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'var_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'var_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'var_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'var_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %b
@@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v64i8'
@@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v64i8'
@@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
@@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
@@ -1376,17 +1464,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'constant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'constant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'constant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -1406,16 +1514,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -1439,7 +1559,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v64i8'
@@ -1447,19 +1567,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 6f3b01a25dbe8..2c5334b960ea2 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'var_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'var_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'var_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v8i16'
@@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v8i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v8i16'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'var_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i16'
@@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v16i16'
@@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v16i16'
@@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i16'
@@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v32i16'
@@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v32i16'
@@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'var_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'var_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'var_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'var_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'var_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'var_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'var_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'var_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'var_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %b
@@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'var_shift_v64i8'
@@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'var_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'var_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'var_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'var_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'var_shift_v64i8'
@@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
@@ -1376,17 +1464,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'constant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'constant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'constant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'constant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -1406,16 +1514,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v32i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'constant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'constant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'constant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'constant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -1439,7 +1559,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'constant_shift_v64i8'
@@ -1447,19 +1567,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'constant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'constant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'constant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'constant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'constant_shift_v64i8'


        


More information about the llvm-commits mailing list