[llvm] 0ec028f - [CostModel][X86] Add CostKinds handling for vector shift by uniform/constuniform ops

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 15 06:05:50 PDT 2022


Author: Simon Pilgrim
Date: 2022-09-15T14:05:30+01:00
New Revision: 0ec028fe105b9bcdda4196cc8803205a1577c12a

URL: https://github.com/llvm/llvm-project/commit/0ec028fe105b9bcdda4196cc8803205a1577c12a
DIFF: https://github.com/llvm/llvm-project/commit/0ec028fe105b9bcdda4196cc8803205a1577c12a.diff

LOG: [CostModel][X86] Add CostKinds handling for vector shift by uniform/constuniform ops

Vector shift by const uniform is the cheapest shift instruction we have, non-const uniform have a marginally higher cost - some targets 'splat' the amount internally to use the shift-per-element instruction, others see a higher cost for the explicit zeroing of the upper bits for the (64-bit) shift amount.

This was achieved with an updated version of the 'cost-tables vs llvm-mca' script D103695 (I'll update the patch soon for reference)

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/arith-fix.ll
    llvm/test/Analysis/CostModel/X86/arith-overflow.ll
    llvm/test/Analysis/CostModel/X86/div-codesize.ll
    llvm/test/Analysis/CostModel/X86/div-latency.ll
    llvm/test/Analysis/CostModel/X86/div-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/div.ll
    llvm/test/Analysis/CostModel/X86/fshl.ll
    llvm/test/Analysis/CostModel/X86/fshr.ll
    llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
    llvm/test/Analysis/CostModel/X86/mul.ll
    llvm/test/Analysis/CostModel/X86/rem-codesize.ll
    llvm/test/Analysis/CostModel/X86/rem-latency.ll
    llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/rem.ll
    llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/uniformshift.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
    llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d7e00a1ae8dbc..8e2c801559f1a 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -329,16 +329,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
   }
 
   static const CostKindTblEntry AVX512BWUniformConstCostTable[] = {
-    { ISD::SHL,  MVT::v64i8,  { 2 } }, // psllw + pand.
-    { ISD::SRL,  MVT::v64i8,  { 2 } }, // psrlw + pand.
-    { ISD::SRA,  MVT::v64i8,  { 4 } }, // psrlw, pand, pxor, psubb.
-
-    { ISD::SHL,  MVT::v16i16, { 1 } }, // psllw
-    { ISD::SRL,  MVT::v16i16, { 1 } }, // psrlw
-    { ISD::SRA,  MVT::v16i16, { 1 } }, // psrlw
-    { ISD::SHL,  MVT::v32i16, { 1 } }, // psllw
-    { ISD::SRL,  MVT::v32i16, { 1 } }, // psrlw
-    { ISD::SRA,  MVT::v32i16, { 1 } }, // psrlw
+    { ISD::SHL,  MVT::v16i8,  { 1, 7, 2, 3 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8,  { 1, 7, 2, 3 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8,  { 1, 8, 4, 5 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v32i8,  { 1, 8, 2, 3 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v32i8,  { 1, 8, 2, 3 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v32i8,  { 1, 9, 4, 5 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v64i8,  { 1, 8, 2, 3 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v64i8,  { 1, 8, 2, 3 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v64i8,  { 1, 9, 4, 6 } }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v16i16, { 1, 1, 1, 1 } }, // psllw
+    { ISD::SRL,  MVT::v16i16, { 1, 1, 1, 1 } }, // psrlw
+    { ISD::SRA,  MVT::v16i16, { 1, 1, 1, 1 } }, // psrlw
+    { ISD::SHL,  MVT::v32i16, { 1, 1, 1, 1 } }, // psllw
+    { ISD::SRL,  MVT::v32i16, { 1, 1, 1, 1 } }, // psrlw
+    { ISD::SRA,  MVT::v32i16, { 1, 1, 1, 1 } }, // psrlw
   };
 
   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasBWI())
@@ -348,18 +354,33 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
         return LT.first * KindCost.value();
 
   static const CostKindTblEntry AVX512UniformConstCostTable[] = {
-    { ISD::SRA,  MVT::v2i64,  { 1 } },
-    { ISD::SRA,  MVT::v4i64,  { 1 } },
-    { ISD::SRA,  MVT::v8i64,  { 1 } },
-
-    { ISD::SHL,  MVT::v64i8,  { 4 } }, // psllw + pand.
-    { ISD::SRL,  MVT::v64i8,  { 4 } }, // psrlw + pand.
-    { ISD::SRA,  MVT::v64i8,  { 8 } }, // psrlw, pand, pxor, psubb.
-
-    { ISD::SDIV, MVT::v16i32, { 6 } }, // pmuludq sequence
-    { ISD::SREM, MVT::v16i32, { 8 } }, // pmuludq+mul+sub sequence
-    { ISD::UDIV, MVT::v16i32, { 5 } }, // pmuludq sequence
-    { ISD::UREM, MVT::v16i32, { 7 } }, // pmuludq+mul+sub sequence
+    { ISD::SHL,  MVT::v64i8,  {  2, 12,  5,  6 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v64i8,  {  2, 12,  5,  6 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v64i8,  {  3, 10, 12, 12 } }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v16i16, {  2,  7,  4,  4 } }, // psllw + split.
+    { ISD::SRL,  MVT::v16i16, {  2,  7,  4,  4 } }, // psrlw + split.
+    { ISD::SRA,  MVT::v16i16, {  2,  7,  4,  4 } }, // psraw + split.
+
+    { ISD::SHL,  MVT::v8i32,  {  1,  1,  1,  1 } }, // pslld
+    { ISD::SRL,  MVT::v8i32,  {  1,  1,  1,  1 } }, // psrld
+    { ISD::SRA,  MVT::v8i32,  {  1,  1,  1,  1 } }, // psrad
+    { ISD::SHL,  MVT::v16i32, {  1,  1,  1,  1 } }, // pslld
+    { ISD::SRL,  MVT::v16i32, {  1,  1,  1,  1 } }, // psrld
+    { ISD::SRA,  MVT::v16i32, {  1,  1,  1,  1 } }, // psrad
+
+    { ISD::SRA,  MVT::v2i64,  {  1,  1,  1,  1 } }, // psraq
+    { ISD::SHL,  MVT::v4i64,  {  1,  1,  1,  1 } }, // psllq
+    { ISD::SRL,  MVT::v4i64,  {  1,  1,  1,  1 } }, // psrlq
+    { ISD::SRA,  MVT::v4i64,  {  1,  1,  1,  1 } }, // psraq
+    { ISD::SHL,  MVT::v8i64,  {  1,  1,  1,  1 } }, // psllq
+    { ISD::SRL,  MVT::v8i64,  {  1,  1,  1,  1 } }, // psrlq
+    { ISD::SRA,  MVT::v8i64,  {  1,  1,  1,  1 } }, // psraq
+
+    { ISD::SDIV, MVT::v16i32, {  6 } }, // pmuludq sequence
+    { ISD::SREM, MVT::v16i32, {  8 } }, // pmuludq+mul+sub sequence
+    { ISD::UDIV, MVT::v16i32, {  5 } }, // pmuludq sequence
+    { ISD::UREM, MVT::v16i32, {  7 } }, // pmuludq+mul+sub sequence
   };
 
   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasAVX512())
@@ -369,11 +390,33 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
         return LT.first * KindCost.value();
 
   static const CostKindTblEntry AVX2UniformConstCostTable[] = {
-    { ISD::SHL,  MVT::v32i8, { 2 } }, // psllw + pand.
-    { ISD::SRL,  MVT::v32i8, { 2 } }, // psrlw + pand.
-    { ISD::SRA,  MVT::v32i8, { 4 } }, // psrlw, pand, pxor, psubb.
-
-    { ISD::SRA,  MVT::v4i64, { 4 } }, // 2 x psrad + shuffle.
+    { ISD::SHL,  MVT::v16i8, { 1,  8, 2, 3 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8, { 1,  8, 2, 3 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8, { 2, 10, 5, 6 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v32i8, { 2,  8, 2, 4 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v32i8, { 2,  8, 2, 4 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v32i8, { 3, 10, 5, 9 } }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v8i16, { 1,  1, 1, 1 } }, // psllw
+    { ISD::SRL,  MVT::v8i16, { 1,  1, 1, 1 } }, // psrlw
+    { ISD::SRA,  MVT::v8i16, { 1,  1, 1, 1 } }, // psraw
+    { ISD::SHL,  MVT::v16i16,{ 2,  2, 1, 2 } }, // psllw
+    { ISD::SRL,  MVT::v16i16,{ 2,  2, 1, 2 } }, // psrlw
+    { ISD::SRA,  MVT::v16i16,{ 2,  2, 1, 2 } }, // psraw
+
+    { ISD::SHL,  MVT::v4i32, { 1,  1, 1, 1 } }, // pslld
+    { ISD::SRL,  MVT::v4i32, { 1,  1, 1, 1 } }, // psrld
+    { ISD::SRA,  MVT::v4i32, { 1,  1, 1, 1 } }, // psrad
+    { ISD::SHL,  MVT::v8i32, { 2,  2, 1, 2 } }, // pslld
+    { ISD::SRL,  MVT::v8i32, { 2,  2, 1, 2 } }, // psrld
+    { ISD::SRA,  MVT::v8i32, { 2,  2, 1, 2 } }, // psrad
+
+    { ISD::SHL,  MVT::v2i64, { 1,  1, 1, 1 } }, // psllq
+    { ISD::SRL,  MVT::v2i64, { 1,  1, 1, 1 } }, // psrlq
+    { ISD::SRA,  MVT::v2i64, { 2,  3, 3, 3 } }, // psrad + shuffle.
+    { ISD::SHL,  MVT::v4i64, { 2,  2, 1, 2 } }, // psllq
+    { ISD::SRL,  MVT::v4i64, { 2,  2, 1, 2 } }, // psrlq
+    { ISD::SRA,  MVT::v4i64, { 4,  4, 3, 6 } }, // psrad + shuffle + split.
 
     { ISD::SDIV, MVT::v8i32, { 6 } }, // pmuludq sequence
     { ISD::SREM, MVT::v8i32, { 8 } }, // pmuludq+mul+sub sequence
@@ -387,28 +430,75 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
       if (auto KindCost = Entry->Cost[CostKind])
         return LT.first * KindCost.value();
 
+  static const CostKindTblEntry AVXUniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v16i8, {  2,  7,  2,  3 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8, {  2,  7,  2,  3 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8, {  3,  9,  5,  6 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v32i8, {  4,  7,  7,  8 } }, // 2*(psllw + pand) + split.
+    { ISD::SRL,  MVT::v32i8, {  4,  7,  7,  8 } }, // 2*(psrlw + pand) + split.
+    { ISD::SRA,  MVT::v32i8, {  7,  7, 12, 13 } }, // 2*(psrlw, pand, pxor, psubb) + split.
+
+    { ISD::SHL,  MVT::v8i16, {  1,  2,  1,  1 } }, // psllw.
+    { ISD::SRL,  MVT::v8i16, {  1,  2,  1,  1 } }, // psrlw.
+    { ISD::SRA,  MVT::v8i16, {  1,  2,  1,  1 } }, // psraw.
+    { ISD::SHL,  MVT::v16i16,{  3,  6,  4,  5 } }, // psllw + split.
+    { ISD::SRL,  MVT::v16i16,{  3,  6,  4,  5 } }, // psrlw + split.
+    { ISD::SRA,  MVT::v16i16,{  3,  6,  4,  5 } }, // psraw + split.
+
+    { ISD::SHL,  MVT::v4i32, {  1,  2,  1,  1 } }, // pslld.
+    { ISD::SRL,  MVT::v4i32, {  1,  2,  1,  1 } }, // psrld.
+    { ISD::SRA,  MVT::v4i32, {  1,  2,  1,  1 } }, // psrad.
+    { ISD::SHL,  MVT::v8i32, {  3,  6,  4,  5 } }, // pslld + split.
+    { ISD::SRL,  MVT::v8i32, {  3,  6,  4,  5 } }, // psrld + split.
+    { ISD::SRA,  MVT::v8i32, {  3,  6,  4,  5 } }, // psrad + split.
+
+    { ISD::SHL,  MVT::v2i64, {  1,  2,  1,  1 } }, // psllq.
+    { ISD::SRL,  MVT::v2i64, {  1,  2,  1,  1 } }, // psrlq.
+    { ISD::SRA,  MVT::v2i64, {  2,  3,  3,  3 } }, // psrad + shuffle.
+    { ISD::SHL,  MVT::v4i64, {  3,  6,  4,  5 } }, // 2 x psllq + split.
+    { ISD::SRL,  MVT::v4i64, {  3,  6,  4,  5 } }, // 2 x psllq + split.
+    { ISD::SRA,  MVT::v4i64, {  5,  7,  8,  9 } }, // 2 x psrad + shuffle + split.
+
+    { ISD::SDIV, MVT::v8i32, { 14 } }, // 2*pmuludq sequence + split.
+    { ISD::SREM, MVT::v8i32, { 18 } }, // 2*pmuludq+mul+sub sequence + split.
+    { ISD::UDIV, MVT::v8i32, { 12 } }, // 2*pmuludq sequence + split.
+    { ISD::UREM, MVT::v8i32, { 16 } }, // 2*pmuludq+mul+sub sequence + split.
+  };
+
+  // XOP has faster vXi8 shifts.
+  if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasAVX() &&
+      (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
+    if (const auto *Entry =
+            CostTableLookup(AVXUniformConstCostTable, ISD, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * KindCost.value();
+
   static const CostKindTblEntry SSE2UniformConstCostTable[] = {
-    { ISD::SHL,  MVT::v16i8, {    2 } }, // psllw + pand.
-    { ISD::SRL,  MVT::v16i8, {    2 } }, // psrlw + pand.
-    { ISD::SRA,  MVT::v16i8, {    4 } }, // psrlw, pand, pxor, psubb.
-
-    { ISD::SHL,  MVT::v32i8, {  4+2 } }, // 2*(psllw + pand) + split.
-    { ISD::SRL,  MVT::v32i8, {  4+2 } }, // 2*(psrlw + pand) + split.
-    { ISD::SRA,  MVT::v32i8, {  8+2 } }, // 2*(psrlw, pand, pxor, psubb) + split.
-
-    { ISD::SDIV, MVT::v8i32, { 12+2 } }, // 2*pmuludq sequence + split.
-    { ISD::SREM, MVT::v8i32, { 16+2 } }, // 2*pmuludq+mul+sub sequence + split.
-    { ISD::SDIV, MVT::v4i32, {    6 } }, // pmuludq sequence
-    { ISD::SREM, MVT::v4i32, {    8 } }, // pmuludq+mul+sub sequence
-    { ISD::UDIV, MVT::v8i32, { 10+2 } }, // 2*pmuludq sequence + split.
-    { ISD::UREM, MVT::v8i32, { 14+2 } }, // 2*pmuludq+mul+sub sequence + split.
-    { ISD::UDIV, MVT::v4i32, {    5 } }, // pmuludq sequence
-    { ISD::UREM, MVT::v4i32, {    7 } }, // pmuludq+mul+sub sequence
+    { ISD::SHL,  MVT::v16i8, { 1, 7, 2, 3 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8, { 1, 7, 2, 3 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8, { 3, 9, 5, 6 } }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v8i16, { 1, 1, 1, 1 } }, // psllw.
+    { ISD::SRL,  MVT::v8i16, { 1, 1, 1, 1 } }, // psrlw.
+    { ISD::SRA,  MVT::v8i16, { 1, 1, 1, 1 } }, // psraw.
+
+    { ISD::SHL,  MVT::v4i32, { 1, 1, 1, 1 } }, // pslld
+    { ISD::SRL,  MVT::v4i32, { 1, 1, 1, 1 } }, // psrld.
+    { ISD::SRA,  MVT::v4i32, { 1, 1, 1, 1 } }, // psrad.
+
+    { ISD::SHL,  MVT::v2i64, { 1, 1, 1, 1 } }, // psllq.
+    { ISD::SRL,  MVT::v2i64, { 1, 1, 1, 1 } }, // psrlq.
+    { ISD::SRA,  MVT::v2i64, { 3, 5, 6, 6 } }, // 2 x psrad + shuffle.
+
+    { ISD::SDIV, MVT::v4i32, { 6 } }, // pmuludq sequence
+    { ISD::SREM, MVT::v4i32, { 8 } }, // pmuludq+mul+sub sequence
+    { ISD::UDIV, MVT::v4i32, { 5 } }, // pmuludq sequence
+    { ISD::UREM, MVT::v4i32, { 7 } }, // pmuludq+mul+sub sequence
   };
 
   // XOP has faster vXi8 shifts.
   if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasSSE2() &&
-      !ST->hasXOP())
+      (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
     if (const auto *Entry =
             CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
       if (auto KindCost = Entry->Cost[CostKind])
@@ -525,10 +615,44 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
       if (auto KindCost = Entry->Cost[CostKind])
         return LT.first * KindCost.value();
 
+  static const CostKindTblEntry AVX512BWUniformCostTable[] = {
+    { ISD::SHL,  MVT::v16i8,  { 3, 5, 5, 7 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8,  { 3,10, 5, 8 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8,  { 4,12, 8,12 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v32i8,  { 4, 7, 6, 8 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v32i8,  { 4, 8, 7, 9 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v32i8,  { 5,10,10,13 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v64i8,  { 4, 7, 6, 8 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v64i8,  { 4, 8, 7,10 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v64i8,  { 5,10,10,15 } }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v32i16, { 2, 4, 2, 3 } }, // psllw
+    { ISD::SRL,  MVT::v32i16, { 2, 4, 2, 3 } }, // psrlw
+    { ISD::SRA,  MVT::v32i16, { 2, 4, 2, 3 } }, // psrqw
+  };
+
+  if (ST->hasBWI() && Op2Info.isUniform())
+    if (const auto *Entry =
+            CostTableLookup(AVX512BWUniformCostTable, ISD, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * KindCost.value();
+
   static const CostKindTblEntry AVX512UniformCostTable[] = {
-    { ISD::SRA,  MVT::v2i64,  { 1 } },
-    { ISD::SRA,  MVT::v4i64,  { 1 } },
-    { ISD::SRA,  MVT::v8i64,  { 1 } },
+    { ISD::SHL,  MVT::v32i16, { 5,10, 5, 7 } }, // psllw + split.
+    { ISD::SRL,  MVT::v32i16, { 5,10, 5, 7 } }, // psrlw + split.
+    { ISD::SRA,  MVT::v32i16, { 5,10, 5, 7 } }, // psraw + split.
+
+    { ISD::SHL,  MVT::v16i32, { 2, 4, 2, 3 } }, // pslld
+    { ISD::SRL,  MVT::v16i32, { 2, 4, 2, 3 } }, // psrld
+    { ISD::SRA,  MVT::v16i32, { 2, 4, 2, 3 } }, // psrad
+
+    { ISD::SRA,  MVT::v2i64,  { 1, 2, 1, 2 } }, // psraq
+    { ISD::SHL,  MVT::v4i64,  { 1, 4, 1, 2 } }, // psllq
+    { ISD::SRL,  MVT::v4i64,  { 1, 4, 1, 2 } }, // psrlq
+    { ISD::SRA,  MVT::v4i64,  { 1, 4, 1, 2 } }, // psraq
+    { ISD::SHL,  MVT::v8i64,  { 1, 4, 1, 2 } }, // psllq
+    { ISD::SRL,  MVT::v8i64,  { 1, 4, 1, 2 } }, // psrlq
+    { ISD::SRA,  MVT::v8i64,  { 1, 4, 1, 2 } }, // psraq
   };
 
   if (ST->hasAVX512() && Op2Info.isUniform())
@@ -539,18 +663,33 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
 
   static const CostKindTblEntry AVX2UniformCostTable[] = {
     // Uniform splats are cheaper for the following instructions.
-    { ISD::SHL,  MVT::v16i16, { 1 } }, // psllw.
-    { ISD::SRL,  MVT::v16i16, { 1 } }, // psrlw.
-    { ISD::SRA,  MVT::v16i16, { 1 } }, // psraw.
-    { ISD::SHL,  MVT::v32i16, { 2 } }, // 2*psllw.
-    { ISD::SRL,  MVT::v32i16, { 2 } }, // 2*psrlw.
-    { ISD::SRA,  MVT::v32i16, { 2 } }, // 2*psraw.
-
-    { ISD::SHL,  MVT::v8i32,  { 1 } }, // pslld
-    { ISD::SRL,  MVT::v8i32,  { 1 } }, // psrld
-    { ISD::SRA,  MVT::v8i32,  { 1 } }, // psrad
-    { ISD::SHL,  MVT::v4i64,  { 1 } }, // psllq
-    { ISD::SRL,  MVT::v4i64,  { 1 } }, // psrlq
+    { ISD::SHL,  MVT::v16i8,  { 3, 5, 5, 7 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8,  { 3, 9, 5, 8 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8,  { 4, 5, 9,13 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v32i8,  { 4, 7, 6, 8 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v32i8,  { 4, 8, 7, 9 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v32i8,  { 6, 9,11,16 } }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v8i16,  { 1, 2, 1, 2 } }, // psllw.
+    { ISD::SRL,  MVT::v8i16,  { 1, 2, 1, 2 } }, // psrlw.
+    { ISD::SRA,  MVT::v8i16,  { 1, 2, 1, 2 } }, // psraw.
+    { ISD::SHL,  MVT::v16i16, { 2, 4, 2, 3 } }, // psllw.
+    { ISD::SRL,  MVT::v16i16, { 2, 4, 2, 3 } }, // psrlw.
+    { ISD::SRA,  MVT::v16i16, { 2, 4, 2, 3 } }, // psraw.
+
+    { ISD::SHL,  MVT::v4i32,  { 1, 2, 1, 2 } }, // pslld
+    { ISD::SRL,  MVT::v4i32,  { 1, 2, 1, 2 } }, // psrld
+    { ISD::SRA,  MVT::v4i32,  { 1, 2, 1, 2 } }, // psrad
+    { ISD::SHL,  MVT::v8i32,  { 2, 4, 2, 3 } }, // pslld
+    { ISD::SRL,  MVT::v8i32,  { 2, 4, 2, 3 } }, // psrld
+    { ISD::SRA,  MVT::v8i32,  { 2, 4, 2, 3 } }, // psrad
+
+    { ISD::SHL,  MVT::v2i64,  { 1, 2, 1, 2 } }, // psllq
+    { ISD::SRL,  MVT::v2i64,  { 1, 2, 1, 2 } }, // psrlq
+    { ISD::SRA,  MVT::v2i64,  { 2, 4, 5, 7 } }, // 2 x psrad + shuffle.
+    { ISD::SHL,  MVT::v4i64,  { 2, 4, 1, 2 } }, // psllq
+    { ISD::SRL,  MVT::v4i64,  { 2, 4, 1, 2 } }, // psrlq
+    { ISD::SRA,  MVT::v4i64,  { 4, 6, 5, 9 } }, // 2 x psrad + shuffle.
     { ISD::SRA,  MVT::v4i64,  { 4 } }, // 2*psrad + shuffle.
   };
 
@@ -560,21 +699,65 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
       if (auto KindCost = Entry->Cost[CostKind])
         return LT.first * KindCost.value();
 
+  static const CostKindTblEntry AVXUniformCostTable[] = {
+    { ISD::SHL,  MVT::v16i8,  {  4, 4, 6, 8 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8,  {  4, 8, 5, 8 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8,  {  6, 6, 9,13 } }, // psrlw, pand, pxor, psubb.
+    { ISD::SHL,  MVT::v32i8,  {  7, 8,11,14 } }, // psllw + pand + split.
+    { ISD::SRL,  MVT::v32i8,  {  7, 9,10,14 } }, // psrlw + pand + split.
+    { ISD::SRA,  MVT::v32i8,  { 10,11,16,21 } }, // psrlw, pand, pxor, psubb + split.
+
+    { ISD::SHL,  MVT::v8i16,  {  1, 3, 1, 2 } }, // psllw.
+    { ISD::SRL,  MVT::v8i16,  {  1, 3, 1, 2 } }, // psrlw.
+    { ISD::SRA,  MVT::v8i16,  {  1, 3, 1, 2 } }, // psraw.
+    { ISD::SHL,  MVT::v16i16, {  3, 7, 5, 7 } }, // psllw + split.
+    { ISD::SRL,  MVT::v16i16, {  3, 7, 5, 7 } }, // psrlw + split.
+    { ISD::SRA,  MVT::v16i16, {  3, 7, 5, 7 } }, // psraw + split.
+
+    { ISD::SHL,  MVT::v4i32,  {  1, 3, 1, 2 } }, // pslld.
+    { ISD::SRL,  MVT::v4i32,  {  1, 3, 1, 2 } }, // psrld.
+    { ISD::SRA,  MVT::v4i32,  {  1, 3, 1, 2 } }, // psrad.
+    { ISD::SHL,  MVT::v8i32,  {  3, 7, 5, 7 } }, // pslld + split.
+    { ISD::SRL,  MVT::v8i32,  {  3, 7, 5, 7 } }, // psrld + split.
+    { ISD::SRA,  MVT::v8i32,  {  3, 7, 5, 7 } }, // psrad + split.
+
+    { ISD::SHL,  MVT::v2i64,  {  1, 3, 1, 2 } }, // psllq.
+    { ISD::SRL,  MVT::v2i64,  {  1, 3, 1, 2 } }, // psrlq.
+    { ISD::SRA,  MVT::v2i64,  {  3, 4, 5, 7 } }, // 2 x psrad + shuffle.
+    { ISD::SHL,  MVT::v4i64,  {  3, 7, 4, 6 } }, // psllq + split.
+    { ISD::SRL,  MVT::v4i64,  {  3, 7, 4, 6 } }, // psrlq + split.
+    { ISD::SRA,  MVT::v4i64,  {  6, 7,10,13 } }, // 2 x (2 x psrad + shuffle) + split.
+  };
+
+  // XOP has faster vXi8 shifts.
+  if (ST->hasAVX() && Op2Info.isUniform() &&
+      (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
+    if (const auto *Entry =
+            CostTableLookup(AVXUniformCostTable, ISD, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * KindCost.value();
+
   static const CostKindTblEntry SSE2UniformCostTable[] = {
     // Uniform splats are cheaper for the following instructions.
-    { ISD::SHL,  MVT::v8i16,  { 1 } }, // psllw.
-    { ISD::SHL,  MVT::v4i32,  { 1 } }, // pslld
-    { ISD::SHL,  MVT::v2i64,  { 1 } }, // psllq.
+    { ISD::SHL,  MVT::v16i8, {  9, 10, 6, 9 } }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8, {  9, 13, 5, 9 } }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8, { 11, 15, 9,13 } }, // pcmpgtb sequence.
 
-    { ISD::SRL,  MVT::v8i16,  { 1 } }, // psrlw.
-    { ISD::SRL,  MVT::v4i32,  { 1 } }, // psrld.
-    { ISD::SRL,  MVT::v2i64,  { 1 } }, // psrlq.
+    { ISD::SHL,  MVT::v8i16, {  2, 2, 1, 2 } }, // psllw.
+    { ISD::SRL,  MVT::v8i16, {  2, 2, 1, 2 } }, // psrlw.
+    { ISD::SRA,  MVT::v8i16, {  2, 2, 1, 2 } }, // psraw.
 
-    { ISD::SRA,  MVT::v8i16,  { 1 } }, // psraw.
-    { ISD::SRA,  MVT::v4i32,  { 1 } }, // psrad.
+    { ISD::SHL,  MVT::v4i32, {  2, 2, 1, 2 } }, // pslld
+    { ISD::SRL,  MVT::v4i32, {  2, 2, 1, 2 } }, // psrld.
+    { ISD::SRA,  MVT::v4i32, {  2, 2, 1, 2 } }, // psrad.
+
+    { ISD::SHL,  MVT::v2i64, {  2, 2, 1, 2 } }, // psllq.
+    { ISD::SRL,  MVT::v2i64, {  2, 2, 1, 2 } }, // psrlq.
+    { ISD::SRA,  MVT::v2i64, {  5, 9, 5, 7 } }, // 2*psrlq + xor + sub.
   };
 
-  if (ST->hasSSE2() && Op2Info.isUniform())
+  if (ST->hasSSE2() && Op2Info.isUniform() &&
+      (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
     if (const auto *Entry =
             CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
       if (auto KindCost = Entry->Cost[CostKind])

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll
index 1febfee669aba..31ca29a997d04 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll
@@ -46,9 +46,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'smul'
@@ -65,45 +65,45 @@ define i32 @smul(i32 %arg) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'smul'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'smul'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -119,12 +119,12 @@ define i32 @smul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'smul'
@@ -141,9 +141,9 @@ define i32 @smul(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'smul'
@@ -157,12 +157,12 @@ define i32 @smul(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'smul'
@@ -179,9 +179,9 @@ define i32 @smul(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'smul'
@@ -198,28 +198,28 @@ define i32 @smul(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'smul'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
@@ -280,9 +280,9 @@ define i32 @umul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'umul'
@@ -299,45 +299,45 @@ define i32 @umul(i32 %arg) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'umul'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'umul'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -353,12 +353,12 @@ define i32 @umul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'umul'
@@ -375,9 +375,9 @@ define i32 @umul(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'umul'
@@ -391,12 +391,12 @@ define i32 @umul(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'umul'
@@ -413,9 +413,9 @@ define i32 @umul(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'umul'
@@ -432,28 +432,28 @@ define i32 @umul(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'umul'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index 8ec7d48ae74dc..c282c4ae7b672 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -990,9 +990,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @smul(i32 %arg) {
 ; SSSE3-LABEL: 'smul'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1002,16 +1002,16 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'smul'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1021,47 +1021,47 @@ define i32 @smul(i32 %arg) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'smul'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'smul'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'smul'
@@ -1075,12 +1075,12 @@ define i32 @smul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'smul'
@@ -1097,9 +1097,9 @@ define i32 @smul(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'smul'
@@ -1113,19 +1113,19 @@ define i32 @smul(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'smul'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1135,16 +1135,16 @@ define i32 @smul(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'smul'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1154,28 +1154,28 @@ define i32 @smul(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'smul'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
@@ -1270,17 +1270,17 @@ define i32 @umul(i32 %arg) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'umul'
@@ -1289,17 +1289,17 @@ define i32 @umul(i32 %arg) {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'umul'
@@ -1316,9 +1316,9 @@ define i32 @umul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'umul'
@@ -1354,9 +1354,9 @@ define i32 @umul(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'umul'
@@ -1403,17 +1403,17 @@ define i32 @umul(i32 %arg) {
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/div-codesize.ll b/llvm/test/Analysis/CostModel/X86/div-codesize.ll
index f5045a54b68e7..0966b5238bcbb 100644
--- a/llvm/test/Analysis/CostModel/X86/div-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/div-codesize.ll
@@ -434,59 +434,59 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE-LABEL: 'sdiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
@@ -500,12 +500,12 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
@@ -522,9 +522,9 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -551,24 +551,100 @@ define i32 @sdiv_uniformconstpow2() {
 }
 
 define i32 @udiv_uniformconstpow2() {
-; CHECK-LABEL: 'udiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'udiv_uniformconstpow2'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'udiv_uniformconstpow2'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'udiv_uniformconstpow2'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'udiv_uniformconstpow2'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'udiv_uniformconstpow2'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
   %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>

diff  --git a/llvm/test/Analysis/CostModel/X86/div-latency.ll b/llvm/test/Analysis/CostModel/X86/div-latency.ll
index d67fcc0b380da..f891c7c4dc578 100644
--- a/llvm/test/Analysis/CostModel/X86/div-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/div-latency.ll
@@ -453,59 +453,59 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE-LABEL: 'sdiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
@@ -519,12 +519,12 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
@@ -541,28 +541,28 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'sdiv_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -589,24 +589,119 @@ define i32 @sdiv_uniformconstpow2() {
 }
 
 define i32 @udiv_uniformconstpow2() {
-; CHECK-LABEL: 'udiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'udiv_uniformconstpow2'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'udiv_uniformconstpow2'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'udiv_uniformconstpow2'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'udiv_uniformconstpow2'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'udiv_uniformconstpow2'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'udiv_uniformconstpow2'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
   %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>

diff  --git a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll
index 8cb917760ae16..2eaff0fa55602 100644
--- a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll
@@ -434,59 +434,59 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE-LABEL: 'sdiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
@@ -500,12 +500,12 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
@@ -522,9 +522,9 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -551,24 +551,100 @@ define i32 @sdiv_uniformconstpow2() {
 }
 
 define i32 @udiv_uniformconstpow2() {
-; CHECK-LABEL: 'udiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'udiv_uniformconstpow2'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'udiv_uniformconstpow2'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'udiv_uniformconstpow2'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'udiv_uniformconstpow2'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'udiv_uniformconstpow2'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
   %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>

diff  --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll
index 4ef5e95c9bc66..75d03f518b748 100644
--- a/llvm/test/Analysis/CostModel/X86/div.ll
+++ b/llvm/test/Analysis/CostModel/X86/div.ll
@@ -1099,9 +1099,9 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE2-LABEL: 'sdiv_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1111,16 +1111,16 @@ define i32 @sdiv_uniformconstpow2() {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'sdiv_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1130,16 +1130,16 @@ define i32 @sdiv_uniformconstpow2() {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'sdiv_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1149,47 +1149,47 @@ define i32 @sdiv_uniformconstpow2() {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
@@ -1203,12 +1203,12 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
@@ -1225,16 +1225,16 @@ define i32 @sdiv_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'sdiv_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1244,28 +1244,28 @@ define i32 @sdiv_uniformconstpow2() {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'sdiv_uniformconstpow2'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -1306,45 +1306,45 @@ define i32 @udiv_uniformconstpow2() {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'udiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'udiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -1360,12 +1360,12 @@ define i32 @udiv_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_uniformconstpow2'
@@ -1382,28 +1382,28 @@ define i32 @udiv_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'udiv_uniformconstpow2'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16

diff  --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index 011b77f94d2a7..eae376c170261 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -312,18 +312,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatvar_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i64'
@@ -331,8 +331,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i64'
@@ -340,8 +340,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_funnel_i64'
@@ -357,18 +357,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i64'
@@ -376,8 +376,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -394,9 +394,9 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i32'
@@ -404,8 +404,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i32'
@@ -413,8 +413,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_funnel_i32'
@@ -422,26 +422,26 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i32'
@@ -449,8 +449,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -467,9 +467,9 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i16'
@@ -477,8 +477,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i16'
@@ -486,8 +486,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_funnel_i16'
@@ -495,8 +495,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i16'
@@ -504,7 +504,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -513,26 +513,26 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i16'
@@ -540,8 +540,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -554,48 +554,39 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 }
 
 define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
-; SSSE3-LABEL: 'splatvar_funnel_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; SSE42-LABEL: 'splatvar_funnel_i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE-LABEL: 'splatvar_funnel_i8'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -603,17 +594,17 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatvar_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -621,18 +612,18 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i8'
@@ -956,15 +947,15 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_funnel_i64'
@@ -991,8 +982,8 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
@@ -1013,15 +1004,15 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_funnel_i32'
@@ -1048,8 +1039,8 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
@@ -1070,22 +1061,22 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
@@ -1098,8 +1089,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
@@ -1119,8 +1110,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
@@ -1133,58 +1124,58 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
@@ -1491,9 +1482,9 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i64'
@@ -1501,8 +1492,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i64'
@@ -1510,8 +1501,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_rotate_i64'
@@ -1527,18 +1518,18 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i64'
@@ -1564,9 +1555,9 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i32'
@@ -1574,8 +1565,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i32'
@@ -1583,8 +1574,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_rotate_i32'
@@ -1600,18 +1591,18 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i32'
@@ -1637,9 +1628,9 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i16'
@@ -1647,8 +1638,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i16'
@@ -1656,8 +1647,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_rotate_i16'
@@ -1665,8 +1656,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_rotate_i16'
@@ -1683,26 +1674,26 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i16'
@@ -1724,48 +1715,39 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 }
 
 define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
-; SSSE3-LABEL: 'splatvar_rotate_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; SSE42-LABEL: 'splatvar_rotate_i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE-LABEL: 'splatvar_rotate_i8'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1782,8 +1764,8 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1791,18 +1773,18 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i8'
@@ -2112,15 +2094,15 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
@@ -2169,15 +2151,15 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
@@ -2226,22 +2208,22 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
@@ -2254,8 +2236,8 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
@@ -2289,30 +2271,30 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
@@ -2324,23 +2306,23 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index f9237e09f6700..0d6f4d49886ad 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -312,18 +312,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatvar_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i64'
@@ -331,8 +331,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i64'
@@ -340,8 +340,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_funnel_i64'
@@ -357,18 +357,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i64'
@@ -376,8 +376,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -394,9 +394,9 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i32'
@@ -404,8 +404,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i32'
@@ -413,8 +413,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_funnel_i32'
@@ -422,26 +422,26 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i32'
@@ -449,8 +449,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -467,9 +467,9 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i16'
@@ -477,8 +477,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i16'
@@ -486,8 +486,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_funnel_i16'
@@ -495,8 +495,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i16'
@@ -504,7 +504,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -513,26 +513,26 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i16'
@@ -540,8 +540,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -554,48 +554,39 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 }
 
 define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
-; SSSE3-LABEL: 'splatvar_funnel_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; SSE42-LABEL: 'splatvar_funnel_i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE-LABEL: 'splatvar_funnel_i8'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -603,17 +594,17 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatvar_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -621,18 +612,18 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_funnel_i8'
@@ -956,15 +947,15 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_funnel_i64'
@@ -991,8 +982,8 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
@@ -1013,15 +1004,15 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_funnel_i32'
@@ -1048,8 +1039,8 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
@@ -1070,22 +1061,22 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
@@ -1098,8 +1089,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
@@ -1119,8 +1110,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
@@ -1133,58 +1124,58 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
@@ -1492,9 +1483,9 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i64'
@@ -1502,8 +1493,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i64'
@@ -1511,8 +1502,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_rotate_i64'
@@ -1528,18 +1519,18 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i64'
@@ -1565,9 +1556,9 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i32'
@@ -1575,8 +1566,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i32'
@@ -1584,8 +1575,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatvar_rotate_i32'
@@ -1601,18 +1592,18 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i32'
@@ -1638,9 +1629,9 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i16'
@@ -1648,8 +1639,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i16'
@@ -1657,8 +1648,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_rotate_i16'
@@ -1666,8 +1657,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_rotate_i16'
@@ -1684,26 +1675,26 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i16'
@@ -1725,48 +1716,39 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 }
 
 define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
-; SSSE3-LABEL: 'splatvar_rotate_i8'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; SSE42-LABEL: 'splatvar_rotate_i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE-LABEL: 'splatvar_rotate_i8'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatvar_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatvar_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatvar_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1783,8 +1765,8 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1792,18 +1774,18 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatvar_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
-; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatvar_rotate_i8'
@@ -2113,15 +2095,15 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
@@ -2170,15 +2152,15 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
@@ -2227,22 +2209,22 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
@@ -2255,8 +2237,8 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
@@ -2290,30 +2272,30 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
@@ -2325,23 +2307,23 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 887309a317e2a..155d509b37126 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -56,17 +56,17 @@ define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
 ;
 ; LATE-LABEL: 'umul'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 93 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'umul'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'umul'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)

diff  --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll
index 05033916259cb..4639b5dbde49b 100644
--- a/llvm/test/Analysis/CostModel/X86/mul.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul.ll
@@ -248,39 +248,39 @@ define i32 @mul_uniformconstpow2() {
 ; AVX1-LABEL: 'mul_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstpow2'
@@ -294,12 +294,12 @@ define i32 @mul_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstpow2'

diff  --git a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll
index a80e68a9a175b..01b3dc1aade48 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll
@@ -510,78 +510,78 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
@@ -595,12 +595,12 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
@@ -617,28 +617,28 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16

diff  --git a/llvm/test/Analysis/CostModel/X86/rem-latency.ll b/llvm/test/Analysis/CostModel/X86/rem-latency.ll
index f1bb53185d8b0..207ac8616a1c7 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-latency.ll
@@ -510,78 +510,78 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
@@ -595,12 +595,12 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
@@ -617,28 +617,28 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 107 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16

diff  --git a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
index 3b3753cd1d5f7..e7768fa517d74 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
@@ -510,78 +510,78 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
@@ -595,12 +595,12 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
@@ -617,28 +617,28 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16

diff  --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll
index 2154e971fc7da..a9e8815a7fef8 100644
--- a/llvm/test/Analysis/CostModel/X86/rem.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem.ll
@@ -1099,9 +1099,9 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1111,16 +1111,16 @@ define i32 @srem_uniformconstpow2() {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'srem_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1130,16 +1130,16 @@ define i32 @srem_uniformconstpow2() {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1149,47 +1149,47 @@ define i32 @srem_uniformconstpow2() {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
@@ -1203,12 +1203,12 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
@@ -1225,16 +1225,16 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1244,16 +1244,16 @@ define i32 @srem_uniformconstpow2() {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'srem_uniformconstpow2'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; GLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1263,28 +1263,28 @@ define i32 @srem_uniformconstpow2() {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'srem_uniformconstpow2'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16

diff  --git a/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll
index 2020da369549d..2cad2e6302d94 100644
--- a/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll
@@ -4,7 +4,7 @@
 define <4 x i32> @shl(<4 x i32> %vector, i32 %scalar) {
 entry:
   ; SSE2: 'shl'
-  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2: cost of 2 {{.*}} shl
   ; SSE2-CODEGEN: movd  %edi, %xmm1
   ; SSE2-CODEGEN: pslld %xmm1, %xmm0
   %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0
@@ -16,7 +16,7 @@ entry:
 define <4 x i32> @ashr(<4 x i32> %vector, i32 %scalar) {
 entry:
   ; SSE2: 'ashr'
-  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2: cost of 2 {{.*}} ashr
   ; SSE2-CODEGEN: movd  %edi, %xmm1
   ; SSE2-CODEGEN: psrad %xmm1, %xmm0
   %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0
@@ -28,7 +28,7 @@ entry:
 define <4 x i32> @lshr(<4 x i32> %vector, i32 %scalar) {
 entry:
   ; SSE2: 'lshr'
-  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2: cost of 2 {{.*}} lshr
   ; SSE2-CODEGEN: movd  %edi, %xmm1
   ; SSE2-CODEGEN: psrld %xmm1, %xmm0
   %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0

diff  --git a/llvm/test/Analysis/CostModel/X86/uniformshift.ll b/llvm/test/Analysis/CostModel/X86/uniformshift.ll
index 8b68886cc9c7a..e9c2e2aabf9ec 100644
--- a/llvm/test/Analysis/CostModel/X86/uniformshift.ll
+++ b/llvm/test/Analysis/CostModel/X86/uniformshift.ll
@@ -4,7 +4,7 @@
 define <4 x i32> @shl(<4 x i32> %vector, i32 %scalar) {
 entry:
   ; SSE2: 'shl'
-  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2: cost of 2 {{.*}} shl
   ; SSE2-CODEGEN: movd  %edi, %xmm1
   ; SSE2-CODEGEN: pslld %xmm1, %xmm0
   %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0
@@ -16,7 +16,7 @@ entry:
 define <4 x i32> @ashr(<4 x i32> %vector, i32 %scalar) {
 entry:
   ; SSE2: 'ashr'
-  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2: cost of 2 {{.*}} ashr
   ; SSE2-CODEGEN: movd  %edi, %xmm1
   ; SSE2-CODEGEN: psrad %xmm1, %xmm0
   %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0
@@ -28,7 +28,7 @@ entry:
 define <4 x i32> @lshr(<4 x i32> %vector, i32 %scalar) {
 entry:
   ; SSE2: 'lshr'
-  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2: cost of 2 {{.*}} lshr
   ; SSE2-CODEGEN: movd  %edi, %xmm1
   ; SSE2-CODEGEN: psrld %xmm1, %xmm0
   %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
index d6758e9e25296..2e92fa26f50d1 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
@@ -138,25 +138,25 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v2i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
@@ -175,37 +175,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
@@ -217,7 +217,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -230,37 +230,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
@@ -272,7 +272,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -579,32 +585,44 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v16i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -616,25 +634,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,19 +664,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -671,25 +695,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,19 +725,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -839,27 +869,99 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ;
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v2i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v2i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v2i64'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOP-LABEL: 'splatconstant_shift_v2i64'
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v2i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
@@ -875,18 +977,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
@@ -902,45 +1060,197 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <32 x i16> %shift
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v16i8'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v64i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
index 2bcadd06761af..78d03b5430a87 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
@@ -482,32 +482,50 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; AVX1-LABEL: 'splatvar_shift_v2i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; AVX2-LABEL: 'splatvar_shift_v2i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v2i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
@@ -519,19 +537,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
@@ -561,7 +579,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> poison, i64 %b, i32 0
@@ -574,19 +592,19 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
@@ -616,7 +634,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> poison, i64 %b, i32 0
@@ -629,13 +647,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
@@ -666,49 +684,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> poison, i32 %b, i32 0
@@ -721,49 +739,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> poison, i32 %b, i32 0
@@ -776,13 +794,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
@@ -813,49 +831,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> poison, i16 %b, i32 0
@@ -868,49 +886,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> poison, i16 %b, i32 0
@@ -923,61 +959,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512F-LABEL: 'splatvar_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatvar_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -990,25 +1014,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -1026,31 +1050,31 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v32i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v32i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v32i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> poison, i8 %b, i32 0
@@ -1063,25 +1087,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1105,7 +1129,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
@@ -1117,13 +1141,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> poison, i8 %b, i32 0
@@ -1594,15 +1618,15 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v2i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
@@ -1615,27 +1639,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v4i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1644,27 +1672,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v8i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1686,19 +1718,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
@@ -1706,7 +1738,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1719,19 +1751,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
@@ -1739,7 +1771,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1761,27 +1793,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1794,23 +1838,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
@@ -1818,7 +1862,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
@@ -1826,7 +1870,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1835,20 +1879,44 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
@@ -1856,27 +1924,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v32i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1885,39 +1969,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v64i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index 2acff1ca1bbc2..6ac984927df05 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -482,32 +482,50 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; AVX1-LABEL: 'splatvar_shift_v2i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; AVX2-LABEL: 'splatvar_shift_v2i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v2i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -519,19 +537,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
@@ -561,7 +579,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -574,19 +592,19 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
@@ -616,7 +634,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -629,13 +647,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
@@ -666,49 +684,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -721,49 +739,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -776,13 +794,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
@@ -813,49 +831,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -868,49 +886,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -923,61 +959,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512F-LABEL: 'splatvar_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatvar_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -990,25 +1014,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -1026,31 +1050,31 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; AVX512F-LABEL: 'splatvar_shift_v32i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatvar_shift_v32i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v32i8'
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -1063,25 +1087,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1105,7 +1129,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
@@ -1117,13 +1141,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -1594,15 +1618,15 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v2i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
@@ -1615,27 +1639,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v4i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1644,27 +1672,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v8i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1686,19 +1718,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
@@ -1706,7 +1738,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1719,19 +1751,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
@@ -1739,7 +1771,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1761,27 +1793,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1794,23 +1838,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
@@ -1818,7 +1862,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
@@ -1826,7 +1870,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1835,20 +1879,44 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
@@ -1856,27 +1924,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v32i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1885,39 +1969,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
-; XOP-LABEL: 'splatconstant_shift_v64i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
index a6d86e3837e00..1f087f9ecb353 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
@@ -138,31 +138,31 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v2i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -175,49 +175,49 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -230,49 +230,49 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -285,32 +285,50 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; AVX1-LABEL: 'splatvar_shift_v4i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v4i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v4i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v4i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v4i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -322,49 +340,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -377,49 +395,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -432,32 +450,50 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v8i16'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'splatvar_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -469,49 +505,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -524,49 +560,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -579,32 +621,56 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -616,25 +682,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,19 +712,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -671,25 +743,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,19 +773,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -839,108 +917,452 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ;
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v2i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v2i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v2i64'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOP-LABEL: 'splatconstant_shift_v2i64'
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v2i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
 }
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
 }
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i16'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <32 x i16> %shift
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v64i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
index 4918e53e3fc7a..aa148e6ac71da 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
@@ -138,31 +138,31 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v2i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -175,49 +175,49 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -230,49 +230,49 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -285,31 +285,31 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v4i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -432,31 +432,31 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> undef, i16 %b, i32 0
@@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -579,32 +585,44 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v16i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -616,25 +634,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,19 +664,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -671,25 +695,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,19 +725,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -839,27 +869,99 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ;
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v2i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v2i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v2i64'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOP-LABEL: 'splatconstant_shift_v2i64'
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v2i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
@@ -875,18 +977,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
@@ -902,45 +1060,197 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <32 x i16> %shift
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v16i8'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v64i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
index 2c178e647a98d..04d44d5c8bd4f 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
@@ -175,19 +175,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
@@ -199,7 +199,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
@@ -217,7 +217,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -230,37 +230,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
@@ -272,7 +272,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -579,31 +585,37 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v16i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -616,25 +628,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,19 +658,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v32i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -671,25 +683,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,19 +713,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -848,18 +866,74 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
@@ -875,18 +949,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
@@ -902,45 +1032,189 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <32 x i16> %shift
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v16i8'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v64i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
index 5ce239377a7b6..1adad5e38be70 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
@@ -506,13 +506,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
@@ -543,37 +543,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
@@ -585,7 +585,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> poison, i64 %b, i32 0
@@ -598,37 +598,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
@@ -640,7 +640,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> poison, i64 %b, i32 0
@@ -653,13 +653,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
@@ -690,49 +690,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> poison, i32 %b, i32 0
@@ -745,49 +745,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> poison, i32 %b, i32 0
@@ -800,13 +800,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
@@ -837,49 +837,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> poison, i16 %b, i32 0
@@ -892,49 +892,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> poison, i16 %b, i32 0
@@ -947,61 +965,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512F-LABEL: 'splatvar_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatvar_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -1014,25 +1020,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -1044,37 +1050,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512F-LABEL: 'splatvar_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512-LABEL: 'splatvar_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> poison, i8 %b, i32 0
@@ -1087,25 +1075,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1117,7 +1105,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
@@ -1129,7 +1117,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
@@ -1141,13 +1129,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> poison, i8 %b, i32 0
@@ -1651,19 +1639,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
@@ -1671,7 +1659,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1684,19 +1672,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
@@ -1704,7 +1692,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1726,19 +1714,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
@@ -1746,7 +1734,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1759,19 +1747,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
@@ -1779,7 +1767,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1801,27 +1789,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1834,23 +1834,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
@@ -1858,7 +1858,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
@@ -1866,7 +1866,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1875,20 +1875,28 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
@@ -1896,11 +1904,11 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
@@ -1915,12 +1923,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1929,11 +1949,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
@@ -1949,23 +1969,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 7c545bbd4efee..a07d5054189f1 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -506,13 +506,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
@@ -543,37 +543,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
@@ -585,7 +585,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -598,37 +598,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
@@ -640,7 +640,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -653,13 +653,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
@@ -690,49 +690,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -745,49 +745,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -800,13 +800,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
@@ -837,49 +837,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -892,49 +892,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -947,61 +965,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512F-LABEL: 'splatvar_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatvar_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -1014,25 +1020,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -1044,37 +1050,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512F-LABEL: 'splatvar_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512-LABEL: 'splatvar_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -1087,25 +1075,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1117,7 +1105,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatvar_shift_v64i8'
@@ -1129,7 +1117,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
@@ -1141,13 +1129,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -1651,19 +1639,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
@@ -1671,7 +1659,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1684,19 +1672,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
@@ -1704,7 +1692,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1726,19 +1714,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
@@ -1746,7 +1734,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1759,19 +1747,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
@@ -1779,7 +1767,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1801,27 +1789,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1834,23 +1834,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
@@ -1858,7 +1858,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
@@ -1866,7 +1866,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1875,20 +1875,28 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
@@ -1896,11 +1904,11 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
@@ -1915,12 +1923,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1929,11 +1949,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
@@ -1949,23 +1969,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
index 7cacda164189e..6e73f62d6d1a9 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
@@ -1,20 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOPAVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX1
 
 ; Verify the cost of vector logical shift right instructions.
 
@@ -138,31 +138,43 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; AVX1-LABEL: 'splatvar_shift_v2i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <2 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v2i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <2 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -175,50 +187,44 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -230,50 +236,44 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -285,31 +285,43 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; AVX1-LABEL: 'splatvar_shift_v4i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v4i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v4i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v4i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -322,50 +334,44 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -377,50 +383,44 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -432,31 +432,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v8i16'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'splatvar_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> undef, i16 %b, i32 0
@@ -469,50 +481,44 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -524,50 +530,50 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; BTVER2-LABEL: 'splatvar_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
@@ -579,32 +585,50 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatvar_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -616,25 +640,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,20 +670,14 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v32i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -671,25 +689,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,20 +719,20 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; BTVER2-LABEL: 'splatvar_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
@@ -839,108 +857,412 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ;
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v2i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v2i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v2i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v2i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v2i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v2i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v2i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
 }
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
 }
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i16'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <32 x i16> %shift
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
index 20e445837b753..c92d537e0eadf 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
@@ -138,31 +138,31 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v2i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -175,49 +175,49 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -230,49 +230,49 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -285,31 +285,31 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v4i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -432,31 +432,31 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> undef, i16 %b, i32 0
@@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -579,31 +585,37 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v16i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -616,25 +628,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,19 +658,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v32i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -671,25 +683,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,19 +713,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -848,18 +866,74 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
@@ -875,18 +949,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
@@ -902,45 +1032,193 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <32 x i16> %shift
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v16i8'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v64i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
index 8c1904b339334..e21a384c8e0dd 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
@@ -175,19 +175,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
@@ -199,7 +199,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
@@ -217,7 +217,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -230,37 +230,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
@@ -272,7 +272,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -579,32 +585,50 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -616,25 +640,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,19 +670,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v32i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -671,25 +695,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,19 +725,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -976,18 +1006,74 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
@@ -1003,18 +1089,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
@@ -1030,9 +1172,41 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
@@ -1040,15 +1214,15 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
@@ -1056,19 +1230,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1076,27 +1254,107 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v16i8'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v64i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
index da1019ba0934f..b9dfbc0d474fc 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
@@ -538,13 +538,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
@@ -568,7 +568,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SLM-LABEL: 'splatvar_shift_v2i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> poison, i64 %b, i32 0
@@ -581,37 +581,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
@@ -623,13 +623,13 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SLM-LABEL: 'splatvar_shift_v4i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> poison, i64 %b, i32 0
@@ -642,37 +642,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
@@ -684,13 +684,13 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SLM-LABEL: 'splatvar_shift_v8i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> poison, i64 %b, i32 0
@@ -703,13 +703,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
@@ -733,7 +733,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SLM-LABEL: 'splatvar_shift_v4i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> poison, i32 %b, i32 0
@@ -746,55 +746,55 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v8i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> poison, i32 %b, i32 0
@@ -807,55 +807,55 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> poison, i32 %b, i32 0
@@ -868,13 +868,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
@@ -898,7 +898,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SLM-LABEL: 'splatvar_shift_v8i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> poison, i16 %b, i32 0
@@ -911,55 +911,55 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> poison, i16 %b, i32 0
@@ -972,55 +972,73 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v32i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> poison, i16 %b, i32 0
@@ -1033,67 +1051,55 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512F-LABEL: 'splatvar_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatvar_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -1106,25 +1112,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -1139,40 +1145,22 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512F-LABEL: 'splatvar_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512-LABEL: 'splatvar_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v32i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> poison, i8 %b, i32 0
@@ -1185,25 +1173,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1227,7 +1215,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
@@ -1239,19 +1227,19 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v64i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> poison, i8 %b, i32 0
@@ -1771,19 +1759,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
@@ -1791,7 +1779,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1804,19 +1792,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
@@ -1824,7 +1812,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1846,19 +1834,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
@@ -1866,7 +1854,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1879,19 +1867,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
@@ -1899,7 +1887,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1921,27 +1909,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1954,23 +1954,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
@@ -1978,7 +1978,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
@@ -1986,7 +1986,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1995,20 +1995,28 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
@@ -2016,11 +2024,11 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
@@ -2035,12 +2043,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -2049,11 +2069,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
@@ -2069,23 +2089,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 98202fd018fea..5c72d51136d8e 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -538,13 +538,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
@@ -568,7 +568,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SLM-LABEL: 'splatvar_shift_v2i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -581,37 +581,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
@@ -623,13 +623,13 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SLM-LABEL: 'splatvar_shift_v4i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -642,37 +642,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
@@ -684,13 +684,13 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SLM-LABEL: 'splatvar_shift_v8i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -703,13 +703,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
@@ -733,7 +733,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SLM-LABEL: 'splatvar_shift_v4i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -746,55 +746,55 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v8i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -807,55 +807,55 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -868,13 +868,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
@@ -898,7 +898,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SLM-LABEL: 'splatvar_shift_v8i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> undef, i16 %b, i32 0
@@ -911,55 +911,55 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -972,55 +972,73 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatvar_shift_v32i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v32i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -1033,67 +1051,55 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512F-LABEL: 'splatvar_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatvar_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -1106,25 +1112,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -1139,40 +1145,22 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512F-LABEL: 'splatvar_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BW-LABEL: 'splatvar_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512VL-LABEL: 'splatvar_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
-;
-; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512-LABEL: 'splatvar_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v32i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -1185,25 +1173,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -1227,7 +1215,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BW-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatvar_shift_v64i8'
@@ -1239,19 +1227,19 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v64i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -1771,19 +1759,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
@@ -1791,7 +1779,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1804,19 +1792,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
@@ -1824,7 +1812,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1846,19 +1834,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
@@ -1866,7 +1854,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1879,19 +1867,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
@@ -1899,7 +1887,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1921,27 +1909,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1954,23 +1954,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
@@ -1978,7 +1978,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
@@ -1986,7 +1986,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1995,20 +1995,28 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
@@ -2016,11 +2024,11 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
@@ -2035,12 +2043,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
+;
+; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -2049,11 +2069,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
@@ -2069,23 +2089,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
index 3d3cd00ed6753..185c4161270d0 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
@@ -1,20 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOPAVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX1
 
 ; Verify the cost of vector logical shift right instructions.
 
@@ -138,31 +138,43 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; AVX1-LABEL: 'splatvar_shift_v2i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <2 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v2i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <2 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v2i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -175,50 +187,44 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v4i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -230,50 +236,44 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v8i64'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -285,31 +285,43 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; AVX1-LABEL: 'splatvar_shift_v4i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v4i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; XOPAVX1-LABEL: 'splatvar_shift_v4i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v4i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -322,50 +334,44 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -377,50 +383,44 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -432,31 +432,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v8i16'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v8i16'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+; AVX1-LABEL: 'splatvar_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> undef, i16 %b, i32 0
@@ -469,50 +481,44 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -524,50 +530,50 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; BTVER2-LABEL: 'splatvar_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
@@ -579,31 +585,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
@@ -616,25 +634,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -646,20 +664,14 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v32i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
-;
-; BTVER2-LABEL: 'splatvar_shift_v32i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -671,25 +683,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -701,20 +713,20 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; BTVER2-LABEL: 'splatvar_shift_v64i8'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
@@ -781,10 +793,6 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
 ; AVX512-LABEL: 'constant_shift_v4i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; BTVER2-LABEL: 'constant_shift_v4i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shift
@@ -818,10 +826,6 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX512-LABEL: 'constant_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
-;
-; BTVER2-LABEL: 'constant_shift_v8i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   ret <8 x i32> %shift
@@ -855,10 +859,6 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; AVX512-LABEL: 'constant_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
-;
-; BTVER2-LABEL: 'constant_shift_v16i32'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   ret <16 x i32> %shift
@@ -901,10 +901,6 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX512-LABEL: 'constant_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
-;
-; BTVER2-LABEL: 'constant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <16 x i16> %shift
@@ -935,9 +931,13 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; BTVER2-LABEL: 'constant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'constant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'constant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <32 x i16> %shift
@@ -975,63 +975,231 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 ;
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v2i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v2i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v2i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v2i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v2i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v2i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v2i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i64'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
 }
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v4i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v4i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
 }
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+; SSE2-LABEL: 'splatconstant_shift_v8i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v8i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i16'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
@@ -1039,36 +1207,36 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
-; BTVER2-LABEL: 'splatconstant_shift_v16i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
@@ -1076,59 +1244,143 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <32 x i16> %shift
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v16i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v16i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v32i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v32i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v32i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE2-LABEL: 'splatconstant_shift_v64i8'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; SSE42-LABEL: 'splatconstant_shift_v64i8'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
index ff6c245124c13..a8ef2641bced1 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
 ;
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
@@ -138,37 +138,37 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v2i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v2i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v2i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v2i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -181,55 +181,55 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v4i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v4i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v4i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -242,55 +242,55 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v8i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i64'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -303,37 +303,37 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v4i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v4i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v4i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v4i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -346,55 +346,55 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v8i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v8i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v8i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -407,55 +407,55 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i32'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -468,37 +468,37 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v8i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v8i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOP-LABEL: 'splatvar_shift_v8i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v8i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v8i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %insert = insertelement <8 x i16> undef, i16 %b, i32 0
@@ -511,55 +511,55 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v16i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v16i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %insert = insertelement <16 x i16> undef, i16 %b, i32 0
@@ -572,55 +572,61 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatvar_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i16> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v32i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %insert = insertelement <32 x i16> undef, i16 %b, i32 0
@@ -633,38 +639,56 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v16i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
-; AVX-LABEL: 'splatvar_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; AVX1-LABEL: 'splatvar_shift_v16i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX2-LABEL: 'splatvar_shift_v16i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v16i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v16i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; BTVER2-LABEL: 'splatvar_shift_v16i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %insert = insertelement <16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -676,25 +700,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v32i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v32i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v32i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
@@ -706,25 +730,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatvar_shift_v32i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v32i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v32i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %insert = insertelement <32 x i8> undef, i8 %b, i32 0
@@ -737,25 +761,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatvar_shift_v64i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v64i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatvar_shift_v64i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, %splat
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
@@ -767,25 +791,31 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, %splat
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
-; AVX512-LABEL: 'splatvar_shift_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; AVX512F-LABEL: 'splatvar_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatvar_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SLM-LABEL: 'splatvar_shift_v64i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; BTVER2-LABEL: 'splatvar_shift_v64i8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, %splat
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %insert = insertelement <64 x i8> undef, i8 %b, i32 0
@@ -1064,18 +1094,66 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 }
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v4i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatconstant_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v4i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v4i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v4i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v4i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatconstant_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i64'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i64'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i64'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %shift
@@ -1091,18 +1169,66 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v8i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatconstant_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v8i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v8i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v8i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v8i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatconstant_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i32'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i32'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i32'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i32> %shift
@@ -1118,9 +1244,37 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 }
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+; SSE-LABEL: 'splatconstant_shift_v16i16'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v16i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v16i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v16i16'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
@@ -1128,31 +1282,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
-; AVX512-LABEL: 'splatconstant_shift_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+; AVX512F-LABEL: 'splatconstant_shift_v32i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; BTVER2-LABEL: 'splatconstant_shift_v32i16'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1160,27 +1318,99 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 }
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+; SSE-LABEL: 'splatconstant_shift_v16i8'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX-LABEL: 'splatconstant_shift_v16i8'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
+;
+; AVX512-LABEL: 'splatconstant_shift_v16i8'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v32i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+; SSE-LABEL: 'splatconstant_shift_v32i8'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v32i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v32i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v32i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
-; CHECK-LABEL: 'splatconstant_shift_v64i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+; SSE-LABEL: 'splatconstant_shift_v64i8'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX1-LABEL: 'splatconstant_shift_v64i8'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX2-LABEL: 'splatconstant_shift_v64i8'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512F-LABEL: 'splatconstant_shift_v64i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
+;
+; BTVER2-LABEL: 'splatconstant_shift_v64i8'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index 79426a9285286..5af7afc1c8dc1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -3,8 +3,8 @@
 
 ; CHECK: 'foo'
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %shift = ashr i32 %val, %k
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %shift = ashr i32 %val, %k
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %shift = ashr i32 %val, %k
+; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %shift = ashr i32 %val, %k
+; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %shift = ashr i32 %val, %k
 define void @foo(i32* nocapture %p, i32 %k) local_unnamed_addr #0 {
 entry:
   br label %body


        


More information about the llvm-commits mailing list