[llvm] 62fca69 - [CostModel][X86][AVX2] Improve 256-bit vector non-uniform shifts costs

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu May 20 04:22:21 PDT 2021


Author: Simon Pilgrim
Date: 2021-05-20T12:16:16+01:00
New Revision: 62fca69a704cc4883736698b781f63f59e7fa7b2

URL: https://github.com/llvm/llvm-project/commit/62fca69a704cc4883736698b781f63f59e7fa7b2
DIFF: https://github.com/llvm/llvm-project/commit/62fca69a704cc4883736698b781f63f59e7fa7b2.diff

LOG: [CostModel][X86][AVX2] Improve 256-bit vector non-uniform shifts costs

Haswell, Excavator and early Ryzen all have slower 256-bit non-uniform vector shifts (confirmed on AMDSoG/Agner/instlatx64 and llvm models) - so bump the worst case costs accordingly.

Noticed while investigating PR50364

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/div.ll
    llvm/test/Analysis/CostModel/X86/fshl.ll
    llvm/test/Analysis/CostModel/X86/fshr.ll
    llvm/test/Analysis/CostModel/X86/rem.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index fbb937fdee18c..4b8d245b3756c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -517,6 +517,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
     { ISD::SHL,  MVT::v32i16, 2 }, // 2*psllw.
     { ISD::SRL,  MVT::v32i16, 2 }, // 2*psrlw.
     { ISD::SRA,  MVT::v32i16, 2 }, // 2*psraw.
+
+    { ISD::SHL,  MVT::v8i32,  1 }, // pslld
+    { ISD::SRL,  MVT::v8i32,  1 }, // psrld
+    { ISD::SRA,  MVT::v8i32,  1 }, // psrad
+    { ISD::SHL,  MVT::v4i64,  1 }, // psllq
+    { ISD::SRL,  MVT::v4i64,  1 }, // psrlq
   };
 
   if (ST->hasAVX2() &&
@@ -576,10 +582,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
       return LT.first * Entry->Cost;
 
   static const CostTblEntry AVX512CostTable[] = {
+    { ISD::SHL,     MVT::v8i32,      1 },
+    { ISD::SRL,     MVT::v8i32,      1 },
+    { ISD::SRA,     MVT::v8i32,      1 },
     { ISD::SHL,     MVT::v16i32,     1 },
     { ISD::SRL,     MVT::v16i32,     1 },
     { ISD::SRA,     MVT::v16i32,     1 },
 
+    { ISD::SHL,     MVT::v4i64,      1 },
+    { ISD::SRL,     MVT::v4i64,      1 },
     { ISD::SHL,     MVT::v8i64,      1 },
     { ISD::SRL,     MVT::v8i64,      1 },
 
@@ -614,13 +625,13 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
     { ISD::SHL,     MVT::v4i32,    1 },
     { ISD::SRL,     MVT::v4i32,    1 },
     { ISD::SRA,     MVT::v4i32,    1 },
-    { ISD::SHL,     MVT::v8i32,    1 },
-    { ISD::SRL,     MVT::v8i32,    1 },
-    { ISD::SRA,     MVT::v8i32,    1 },
+    { ISD::SHL,     MVT::v8i32,    2 }, // vpsllvd (Haswell from agner.org)
+    { ISD::SRL,     MVT::v8i32,    2 }, // vpsrlvd (Haswell from agner.org)
+    { ISD::SRA,     MVT::v8i32,    2 }, // vpsravd (Haswell from agner.org)
     { ISD::SHL,     MVT::v2i64,    1 },
     { ISD::SRL,     MVT::v2i64,    1 },
-    { ISD::SHL,     MVT::v4i64,    1 },
-    { ISD::SRL,     MVT::v4i64,    1 },
+    { ISD::SHL,     MVT::v4i64,    2 }, // vpsllvq (Haswell from agner.org)
+    { ISD::SRL,     MVT::v4i64,    2 }, // vpsrlvq (Haswell from agner.org)
   };
 
   if (ST->hasAVX512()) {

diff  --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll
index 98a1d2380df9b..14f809d0634a3 100644
--- a/llvm/test/Analysis/CostModel/X86/div.ll
+++ b/llvm/test/Analysis/CostModel/X86/div.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
 ;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
 
 define i32 @sdiv() {
@@ -388,25 +388,6 @@ define i32 @udiv_const() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; SLM-LABEL: 'udiv_const'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
 ; BTVER2-LABEL: 'udiv_const'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
@@ -545,25 +526,6 @@ define i32 @sdiv_uniformconst() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; SLM-LABEL: 'sdiv_uniformconst'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
 ; BTVER2-LABEL: 'sdiv_uniformconst'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
@@ -702,25 +664,6 @@ define i32 @udiv_uniformconst() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; SLM-LABEL: 'udiv_uniformconst'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
 ; BTVER2-LABEL: 'udiv_uniformconst'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
@@ -843,12 +786,12 @@ define i32 @sdiv_constpow2() {
 ; AVX2-LABEL: 'sdiv_constpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
@@ -1038,12 +981,12 @@ define i32 @udiv_constpow2() {
 ; AVX2-LABEL: 'udiv_constpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 8, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
@@ -1154,24 +1097,62 @@ define i32 @udiv_constpow2() {
 }
 
 define i32 @sdiv_uniformconstpow2() {
-; SSE-LABEL: 'sdiv_uniformconstpow2'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-LABEL: 'sdiv_uniformconstpow2'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSSE3-LABEL: 'sdiv_uniformconstpow2'
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'sdiv_uniformconstpow2'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
@@ -1406,25 +1387,6 @@ define i32 @udiv_uniformconstpow2() {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
-; SLM-LABEL: 'udiv_uniformconstpow2'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
 ; BTVER2-LABEL: 'udiv_uniformconstpow2'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>

diff  --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index 0d7954e4f03da..1fb944e497bed 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -37,8 +37,8 @@ define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
 ; AVX2-LABEL: 'var_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 %c64)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_funnel_i64'
@@ -101,8 +101,8 @@ define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
 ; AVX2-LABEL: 'var_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 %c32)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_funnel_i32'
@@ -693,8 +693,8 @@ define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-LABEL: 'constant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 1, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_funnel_i64'
@@ -757,8 +757,8 @@ define void @constant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-LABEL: 'constant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 4, i32 5, i32 6, i32 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_funnel_i32'
@@ -1241,8 +1241,8 @@ define void @var_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
 ; AVX2-LABEL: 'var_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 %c64)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_rotate_i64'
@@ -1305,8 +1305,8 @@ define void @var_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
 ; AVX2-LABEL: 'var_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 %c32)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_rotate_i32'
@@ -1863,8 +1863,8 @@ define void @constant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-LABEL: 'constant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 1, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_rotate_i64'
@@ -1927,8 +1927,8 @@ define void @constant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-LABEL: 'constant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 4, i32 5, i32 6, i32 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_rotate_i32'

diff  --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index 2bde39423ae4b..66c2eb41ef263 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -37,8 +37,8 @@ define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
 ; AVX2-LABEL: 'var_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_funnel_i64'
@@ -101,8 +101,8 @@ define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
 ; AVX2-LABEL: 'var_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 %c32)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_funnel_i32'
@@ -693,8 +693,8 @@ define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-LABEL: 'constant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 1, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_funnel_i64'
@@ -757,8 +757,8 @@ define void @constant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-LABEL: 'constant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 4, i32 5, i32 6, i32 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_funnel_i32'
@@ -1242,8 +1242,8 @@ define void @var_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
 ; AVX2-LABEL: 'var_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 %c64)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_rotate_i64'
@@ -1306,8 +1306,8 @@ define void @var_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
 ; AVX2-LABEL: 'var_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 %c32)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'var_rotate_i32'
@@ -1864,8 +1864,8 @@ define void @constant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; AVX2-LABEL: 'constant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 1, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_rotate_i64'
@@ -1928,8 +1928,8 @@ define void @constant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
 ; AVX2-LABEL: 'constant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 4, i32 5, i32 6, i32 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'constant_rotate_i32'

diff  --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll
index 30dd9a7a4f13f..3f21ca8af4e18 100644
--- a/llvm/test/Analysis/CostModel/X86/rem.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem.ll
@@ -805,12 +805,12 @@ define i32 @srem_constpow2() {
 ; AVX2-LABEL: 'srem_constpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
index f12a52b32c75e..067aaba448bc4 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
@@ -160,7 +160,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i32'
@@ -168,7 +168,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i32'
@@ -197,7 +197,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v16i32'
@@ -205,7 +205,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v16i32'
@@ -1216,7 +1216,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i32'
@@ -1224,7 +1224,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i32'
@@ -1253,7 +1253,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v16i32'
@@ -1261,7 +1261,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v16i32'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index 8b8d0bfd4de3b..24abcc7811193 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -160,7 +160,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i32'
@@ -168,7 +168,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i32'
@@ -197,7 +197,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v16i32'
@@ -205,7 +205,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v16i32'
@@ -1216,7 +1216,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i32'
@@ -1224,7 +1224,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i32'
@@ -1253,7 +1253,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v16i32'
@@ -1261,7 +1261,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v16i32'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
index 817d39a7008d5..e2b07affe20eb 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
@@ -65,7 +65,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v4i64'
@@ -73,7 +73,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v4i64'
@@ -98,7 +98,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i64'
@@ -106,7 +106,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i64'
@@ -172,7 +172,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i32'
@@ -180,7 +180,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i32'
@@ -209,7 +209,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v16i32'
@@ -217,7 +217,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v16i32'
@@ -1133,7 +1133,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v4i64'
@@ -1141,7 +1141,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v4i64'
@@ -1166,7 +1166,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i64'
@@ -1174,7 +1174,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i64'
@@ -1236,7 +1236,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i32'
@@ -1244,7 +1244,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i32'
@@ -1273,7 +1273,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v16i32'
@@ -1281,7 +1281,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v16i32'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index d474b99e987ae..e60fcad119be4 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -65,7 +65,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v4i64'
@@ -73,7 +73,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v4i64'
@@ -98,7 +98,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i64'
@@ -106,7 +106,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i64'
@@ -172,7 +172,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i32'
@@ -180,7 +180,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i32'
@@ -209,7 +209,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v16i32'
@@ -217,7 +217,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v16i32'
@@ -1133,7 +1133,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v4i64'
@@ -1141,7 +1141,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v4i64'
@@ -1166,7 +1166,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i64'
@@ -1174,7 +1174,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i64'
@@ -1236,7 +1236,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i32'
@@ -1244,7 +1244,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i32'
@@ -1273,7 +1273,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v16i32'
@@ -1281,7 +1281,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v16i32'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
index 8dc84fa10aec5..3d4e8ccf408f3 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
@@ -61,7 +61,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v4i64'
@@ -69,7 +69,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v4i64'
@@ -94,7 +94,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i64'
@@ -102,7 +102,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i64'
@@ -164,7 +164,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i32'
@@ -172,7 +172,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i32'
@@ -201,7 +201,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v16i32'
@@ -209,7 +209,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v16i32'
@@ -1125,7 +1125,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v4i64'
@@ -1133,7 +1133,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v4i64'
@@ -1158,7 +1158,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i64'
@@ -1166,7 +1166,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i64'
@@ -1228,7 +1228,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i32'
@@ -1236,7 +1236,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i32'
@@ -1265,7 +1265,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v16i32'
@@ -1273,7 +1273,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v16i32'
@@ -2011,7 +2011,7 @@ define <8 x i32> @test7(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; AVX2-LABEL: 'test7'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; XOPAVX1-LABEL: 'test7'
@@ -2019,7 +2019,7 @@ define <8 x i32> @test7(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; XOPAVX2-LABEL: 'test7'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; AVX512-LABEL: 'test7'
@@ -2049,7 +2049,7 @@ define <4 x i64> @test8(<4 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; AVX2-LABEL: 'test8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; XOPAVX1-LABEL: 'test8'
@@ -2057,7 +2057,7 @@ define <4 x i64> @test8(<4 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; XOPAVX2-LABEL: 'test8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; AVX512-LABEL: 'test8'
@@ -2135,7 +2135,7 @@ define <16 x i32> @test10(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; AVX2-LABEL: 'test10'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; XOPAVX1-LABEL: 'test10'
@@ -2143,7 +2143,7 @@ define <16 x i32> @test10(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; XOPAVX2-LABEL: 'test10'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; AVX512-LABEL: 'test10'
@@ -2173,7 +2173,7 @@ define <8 x i64> @test11(<8 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; AVX2-LABEL: 'test11'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; XOPAVX1-LABEL: 'test11'
@@ -2181,7 +2181,7 @@ define <8 x i64> @test11(<8 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; XOPAVX2-LABEL: 'test11'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; AVX512-LABEL: 'test11'

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 145c7e71efa11..a5cf9ca3537f4 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -61,7 +61,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v4i64'
@@ -69,7 +69,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v4i64'
@@ -94,7 +94,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i64'
@@ -102,7 +102,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i64'
@@ -164,7 +164,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v8i32'
@@ -172,7 +172,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v8i32'
@@ -201,7 +201,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'var_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %b
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %b
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'var_shift_v16i32'
@@ -209,7 +209,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'var_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %b
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %b
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'var_shift_v16i32'
@@ -1125,7 +1125,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v4i64'
@@ -1133,7 +1133,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v4i64'
@@ -1158,7 +1158,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i64'
@@ -1166,7 +1166,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i64'
@@ -1228,7 +1228,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v8i32'
@@ -1236,7 +1236,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v8i32'
@@ -1265,7 +1265,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'constant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'constant_shift_v16i32'
@@ -1273,7 +1273,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'constant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'constant_shift_v16i32'
@@ -2011,7 +2011,7 @@ define <8 x i32> @test7(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; AVX2-LABEL: 'test7'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; XOPAVX1-LABEL: 'test7'
@@ -2019,7 +2019,7 @@ define <8 x i32> @test7(<8 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; XOPAVX2-LABEL: 'test7'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl
 ;
 ; AVX512-LABEL: 'test7'
@@ -2049,7 +2049,7 @@ define <4 x i64> @test8(<4 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; AVX2-LABEL: 'test8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; XOPAVX1-LABEL: 'test8'
@@ -2057,7 +2057,7 @@ define <4 x i64> @test8(<4 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; XOPAVX2-LABEL: 'test8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl
 ;
 ; AVX512-LABEL: 'test8'
@@ -2135,7 +2135,7 @@ define <16 x i32> @test10(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; AVX2-LABEL: 'test10'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; XOPAVX1-LABEL: 'test10'
@@ -2143,7 +2143,7 @@ define <16 x i32> @test10(<16 x i32> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; XOPAVX2-LABEL: 'test10'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl
 ;
 ; AVX512-LABEL: 'test10'
@@ -2173,7 +2173,7 @@ define <8 x i64> @test11(<8 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; AVX2-LABEL: 'test11'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; XOPAVX1-LABEL: 'test11'
@@ -2181,7 +2181,7 @@ define <8 x i64> @test11(<8 x i64> %a) {
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; XOPAVX2-LABEL: 'test11'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl
 ;
 ; AVX512-LABEL: 'test11'


        


More information about the llvm-commits mailing list