[llvm] 10e0f3e - [CostModel][X86] Add CostKinds handling for ctpop ops

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 6 09:27:43 PDT 2022


Author: Simon Pilgrim
Date: 2022-09-06T17:27:24+01:00
New Revision: 10e0f3e9481d1e88e55e033e2a608840d8545f3b

URL: https://github.com/llvm/llvm-project/commit/10e0f3e9481d1e88e55e033e2a608840d8545f3b
DIFF: https://github.com/llvm/llvm-project/commit/10e0f3e9481d1e88e55e033e2a608840d8545f3b.diff

LOG: [CostModel][X86] Add CostKinds handling for ctpop ops

This was achieved with an updated version of the 'cost-tables vs llvm-mca' script D103695 (although it still struggles with avx512 predicate numbers which had to be done manually)

Some of the pre-AVX values still aren't great - atom/slm worst case numbers for ctpop expansion really affect these (especially throughput/latency), so we need to clean them up in a more consistent way - its a pity we don't have models for more older cpus (merom/nehalem etc.) as other examples.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/ctpop-codesize.ll
    llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
    llvm/test/Analysis/CostModel/X86/ctpop-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/ctpop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 2aaac9926c80..1d7b8af867b9 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3128,20 +3128,20 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   // TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
   //       specialized in these tables yet.
   static const CostKindTblEntry AVX512BITALGCostTbl[] = {
-    { ISD::CTPOP,      MVT::v32i16,  {  1 } },
-    { ISD::CTPOP,      MVT::v64i8,   {  1 } },
-    { ISD::CTPOP,      MVT::v16i16,  {  1 } },
-    { ISD::CTPOP,      MVT::v32i8,   {  1 } },
-    { ISD::CTPOP,      MVT::v8i16,   {  1 } },
-    { ISD::CTPOP,      MVT::v16i8,   {  1 } },
+    { ISD::CTPOP,      MVT::v32i16,  {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v64i8,   {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v16i16,  {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v32i8,   {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v8i16,   {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v16i8,   {  1,  1,  1,  1 } },
   };
   static const CostKindTblEntry AVX512VPOPCNTDQCostTbl[] = {
-    { ISD::CTPOP,      MVT::v8i64,   {  1 } },
-    { ISD::CTPOP,      MVT::v16i32,  {  1 } },
-    { ISD::CTPOP,      MVT::v4i64,   {  1 } },
-    { ISD::CTPOP,      MVT::v8i32,   {  1 } },
-    { ISD::CTPOP,      MVT::v2i64,   {  1 } },
-    { ISD::CTPOP,      MVT::v4i32,   {  1 } },
+    { ISD::CTPOP,      MVT::v8i64,   {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v16i32,  {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v4i64,   {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v8i32,   {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v2i64,   {  1,  1,  1,  1 } },
+    { ISD::CTPOP,      MVT::v4i32,   {  1,  1,  1,  1 } },
   };
   static const CostKindTblEntry AVX512CDCostTbl[] = {
     { ISD::CTLZ,       MVT::v8i64,   {  1 } },
@@ -3171,10 +3171,18 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v16i32,  { 22 } },
     { ISD::CTLZ,       MVT::v32i16,  { 18 } },
     { ISD::CTLZ,       MVT::v64i8,   { 17 } },
-    { ISD::CTPOP,      MVT::v8i64,   {  7 } },
-    { ISD::CTPOP,      MVT::v16i32,  { 11 } },
-    { ISD::CTPOP,      MVT::v32i16,  {  9 } },
-    { ISD::CTPOP,      MVT::v64i8,   {  6 } },
+    { ISD::CTPOP,      MVT::v2i64,   {  3,  7, 10, 10 } },
+    { ISD::CTPOP,      MVT::v4i64,   {  3,  7, 10, 10 } },
+    { ISD::CTPOP,      MVT::v8i64,   {  3,  8, 10, 12 } },
+    { ISD::CTPOP,      MVT::v4i32,   {  7, 11, 14, 14 } },
+    { ISD::CTPOP,      MVT::v8i32,   {  7, 11, 14, 14 } },
+    { ISD::CTPOP,      MVT::v16i32,  {  7, 12, 14, 16 } },
+    { ISD::CTPOP,      MVT::v8i16,   {  2,  7, 11, 11 } },
+    { ISD::CTPOP,      MVT::v16i16,  {  2,  7, 11, 11 } },
+    { ISD::CTPOP,      MVT::v32i16,  {  3,  7, 11, 13 } },
+    { ISD::CTPOP,      MVT::v16i8,   {  2,  4,  8,  8 } },
+    { ISD::CTPOP,      MVT::v32i8,   {  2,  4,  8,  8 } },
+    { ISD::CTPOP,      MVT::v64i8,   {  2,  5,  8, 10 } },
     { ISD::CTTZ,       MVT::v8i64,   { 10 } },
     { ISD::CTTZ,       MVT::v16i32,  { 14 } },
     { ISD::CTTZ,       MVT::v32i16,  { 12 } },
@@ -3214,10 +3222,10 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v16i32,  { 35 } },
     { ISD::CTLZ,       MVT::v32i16,  { 28 } },
     { ISD::CTLZ,       MVT::v64i8,   { 18 } },
-    { ISD::CTPOP,      MVT::v8i64,   { 16 } },
-    { ISD::CTPOP,      MVT::v16i32,  { 24 } },
-    { ISD::CTPOP,      MVT::v32i16,  { 18 } },
-    { ISD::CTPOP,      MVT::v64i8,   { 12 } },
+    { ISD::CTPOP,      MVT::v8i64,   { 16, 16, 19, 19 } },
+    { ISD::CTPOP,      MVT::v16i32,  { 24, 19, 27, 27 } },
+    { ISD::CTPOP,      MVT::v32i16,  { 18, 15, 22, 22 } },
+    { ISD::CTPOP,      MVT::v64i8,   { 12, 11, 16, 16 } },
     { ISD::CTTZ,       MVT::v8i64,   { 20 } },
     { ISD::CTTZ,       MVT::v16i32,  { 28 } },
     { ISD::CTTZ,       MVT::v32i16,  { 24 } },
@@ -3317,14 +3325,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v16i16,  {  4 } },
     { ISD::CTLZ,       MVT::v16i8,   {  3 } },
     { ISD::CTLZ,       MVT::v32i8,   {  3 } },
-    { ISD::CTPOP,      MVT::v2i64,   {  3 } },
-    { ISD::CTPOP,      MVT::v4i64,   {  3 } },
-    { ISD::CTPOP,      MVT::v4i32,   {  7 } },
-    { ISD::CTPOP,      MVT::v8i32,   {  7 } },
-    { ISD::CTPOP,      MVT::v8i16,   {  3 } },
-    { ISD::CTPOP,      MVT::v16i16,  {  3 } },
-    { ISD::CTPOP,      MVT::v16i8,   {  2 } },
-    { ISD::CTPOP,      MVT::v32i8,   {  2 } },
+    { ISD::CTPOP,      MVT::v2i64,   {  3,  9, 10, 10 } },
+    { ISD::CTPOP,      MVT::v4i64,   {  4,  9, 10, 14 } },
+    { ISD::CTPOP,      MVT::v4i32,   {  7, 12, 14, 14 } },
+    { ISD::CTPOP,      MVT::v8i32,   {  7, 12, 14, 18 } },
+    { ISD::CTPOP,      MVT::v8i16,   {  3,  7, 11, 11 } },
+    { ISD::CTPOP,      MVT::v16i16,  {  6,  8, 11, 18 } },
+    { ISD::CTPOP,      MVT::v16i8,   {  2,  5,  8,  8 } },
+    { ISD::CTPOP,      MVT::v32i8,   {  3,  5,  8, 12 } },
     { ISD::CTTZ,       MVT::v2i64,   {  4 } },
     { ISD::CTTZ,       MVT::v4i64,   {  4 } },
     { ISD::CTTZ,       MVT::v4i32,   {  7 } },
@@ -3380,10 +3388,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v8i32,   { 38 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTLZ,       MVT::v16i16,  { 30 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTLZ,       MVT::v32i8,   { 20 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v4i64,   { 16 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v8i32,   { 24 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v16i16,  { 20 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v32i8,   { 14 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v4i64,   { 16, 19, 19, 28 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v2i64,   {  7,  9, 10, 14 } },
+    { ISD::CTPOP,      MVT::v8i32,   { 24, 27, 27, 36 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v4i32,   { 11, 12, 14, 18 } },
+    { ISD::CTPOP,      MVT::v16i16,  { 20, 23, 22, 31 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v8i16,   {  9, 11, 11, 15 } },
+    { ISD::CTPOP,      MVT::v32i8,   { 14, 17, 16, 25 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v16i8,   {  6,  7,  8, 12 } },
     { ISD::CTTZ,       MVT::v4i64,   { 22 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTTZ,       MVT::v8i32,   { 30 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTTZ,       MVT::v16i16,  { 26 } }, // 2 x 128-bit Op + extract/insert
@@ -3467,10 +3479,10 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v4i32,   { 18 } },
     { ISD::CTLZ,       MVT::v8i16,   { 14 } },
     { ISD::CTLZ,       MVT::v16i8,   {  9 } },
-    { ISD::CTPOP,      MVT::v2i64,   {  7 } },
-    { ISD::CTPOP,      MVT::v4i32,   { 11 } },
-    { ISD::CTPOP,      MVT::v8i16,   {  9 } },
-    { ISD::CTPOP,      MVT::v16i8,   {  6 } },
+    { ISD::CTPOP,      MVT::v2i64,   {  7, 19, 12, 18 } },
+    { ISD::CTPOP,      MVT::v4i32,   { 11, 24, 16, 22 } },
+    { ISD::CTPOP,      MVT::v8i16,   {  9, 18, 14, 20 } },
+    { ISD::CTPOP,      MVT::v16i8,   {  6, 12, 10, 16 } },
     { ISD::CTTZ,       MVT::v2i64,   { 10 } },
     { ISD::CTTZ,       MVT::v4i32,   { 14 } },
     { ISD::CTTZ,       MVT::v8i16,   { 12 } },
@@ -3492,10 +3504,10 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v4i32,   { 26 } },
     { ISD::CTLZ,       MVT::v8i16,   { 20 } },
     { ISD::CTLZ,       MVT::v16i8,   { 17 } },
-    { ISD::CTPOP,      MVT::v2i64,   { 12 } },
-    { ISD::CTPOP,      MVT::v4i32,   { 15 } },
-    { ISD::CTPOP,      MVT::v8i16,   { 13 } },
-    { ISD::CTPOP,      MVT::v16i8,   { 10 } },
+    { ISD::CTPOP,      MVT::v2i64,   { 12, 26, 16, 18 } },
+    { ISD::CTPOP,      MVT::v4i32,   { 15, 29, 21, 23 } },
+    { ISD::CTPOP,      MVT::v8i16,   { 13, 25, 18, 20 } },
+    { ISD::CTPOP,      MVT::v16i8,   { 10, 21, 14, 16 } },
     { ISD::CTTZ,       MVT::v2i64,   { 14 } },
     { ISD::CTTZ,       MVT::v4i32,   { 18 } },
     { ISD::CTTZ,       MVT::v8i16,   { 16 } },
@@ -3542,12 +3554,12 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::i8,      {  1 } },
   };
   static const CostKindTblEntry POPCNT64CostTbl[] = { // 64-bit targets
-    { ISD::CTPOP,      MVT::i64,     {  1 } },
+    { ISD::CTPOP,      MVT::i64,     {  1, 1, 1, 1 } }, // popcnt
   };
   static const CostKindTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
-    { ISD::CTPOP,      MVT::i32,     {  1 } },
-    { ISD::CTPOP,      MVT::i16,     {  1 } },
-    { ISD::CTPOP,      MVT::i8,      {  1 } },
+    { ISD::CTPOP,      MVT::i32,     {  1, 1, 1, 1 } }, // popcnt
+    { ISD::CTPOP,      MVT::i16,     {  1, 1, 2, 2 } }, // popcnt(zext())
+    { ISD::CTPOP,      MVT::i8,      {  1, 1, 2, 2 } }, // popcnt(zext())
   };
   static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
     { ISD::ABS,        MVT::i64,     {  2 } }, // SUB+CMOV
@@ -3555,7 +3567,7 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::BSWAP,      MVT::i64,     {  1 } },
     { ISD::CTLZ,       MVT::i64,     {  4 } }, // BSR+XOR or BSR+XOR+CMOV
     { ISD::CTTZ,       MVT::i64,     {  3 } }, // TEST+BSF+CMOV/BRANCH
-    { ISD::CTPOP,      MVT::i64,     { 10 } },
+    { ISD::CTPOP,      MVT::i64,     { 10, 6, 19, 19 } },
     { ISD::SADDO,      MVT::i64,     {  1 } },
     { ISD::UADDO,      MVT::i64,     {  1 } },
     { ISD::UMULO,      MVT::i64,     {  2 } }, // mulq + seto
@@ -3574,9 +3586,9 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTTZ,       MVT::i32,     {  3 } }, // TEST+BSF+CMOV/BRANCH
     { ISD::CTTZ,       MVT::i16,     {  3 } }, // TEST+BSF+CMOV/BRANCH
     { ISD::CTTZ,       MVT::i8,      {  3 } }, // TEST+BSF+CMOV/BRANCH
-    { ISD::CTPOP,      MVT::i32,     {  8 } },
-    { ISD::CTPOP,      MVT::i16,     {  9 } },
-    { ISD::CTPOP,      MVT::i8,      {  7 } },
+    { ISD::CTPOP,      MVT::i32,     {  8,  7, 15, 15 } },
+    { ISD::CTPOP,      MVT::i16,     {  9,  8, 17, 17 } },
+    { ISD::CTPOP,      MVT::i8,      {  7,  6, 13, 13 } },
     { ISD::SADDO,      MVT::i32,     {  1 } },
     { ISD::SADDO,      MVT::i16,     {  1 } },
     { ISD::SADDO,      MVT::i8,      {  1 } },

diff  --git a/llvm/test/Analysis/CostModel/X86/ctpop-codesize.ll b/llvm/test/Analysis/CostModel/X86/ctpop-codesize.ll
index 84cd2f3bcdab..75adeee09197 100644
--- a/llvm/test/Analysis/CostModel/X86/ctpop-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctpop-codesize.ll
@@ -18,7 +18,7 @@ declare  i8 @llvm.ctpop.i8(i8)
 
 define i64 @var_ctpop_i64(i64 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i64'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i64'
@@ -31,7 +31,7 @@ define i64 @var_ctpop_i64(i64 %a) {
 
 define i32 @var_ctpop_i32(i32 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i32'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i32'
@@ -44,11 +44,11 @@ define i32 @var_ctpop_i32(i32 %a) {
 
 define i16 @var_ctpop_i16(i16 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i16'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i16'
-; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; POPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
 ; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctpop
 ;
   %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
@@ -57,11 +57,11 @@ define i16 @var_ctpop_i16(i16 %a) {
 
 define i8 @var_ctpop_i8(i8 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i8'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i8'
-; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; POPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
 ; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctpop
 ;
   %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
@@ -87,27 +87,27 @@ declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
 
 define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v2i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v2i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v2i64'
@@ -115,7 +115,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v2i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
@@ -124,27 +124,27 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v4i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v4i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v4i64'
@@ -152,7 +152,7 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v4i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
@@ -161,27 +161,27 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i64'
@@ -189,7 +189,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
   %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
@@ -198,27 +198,27 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v4i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v4i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v4i32'
@@ -226,7 +226,7 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v4i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
   %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
@@ -235,27 +235,27 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i32'
@@ -263,7 +263,7 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
   %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
@@ -272,27 +272,27 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i32'
@@ -300,7 +300,7 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
   %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
@@ -309,31 +309,31 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i16'
@@ -346,31 +346,31 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i16'
@@ -383,31 +383,31 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v32i16'
@@ -420,31 +420,31 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i8'
@@ -457,31 +457,31 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v32i8'
@@ -494,31 +494,31 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v64i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll b/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
index d1ef37c584e8..818984786563 100644
--- a/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
@@ -18,7 +18,7 @@ declare  i8 @llvm.ctpop.i8(i8)
 
 define i64 @var_ctpop_i64(i64 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i64'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i64'
@@ -31,7 +31,7 @@ define i64 @var_ctpop_i64(i64 %a) {
 
 define i32 @var_ctpop_i32(i32 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i32'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i32'
@@ -44,7 +44,7 @@ define i32 @var_ctpop_i32(i32 %a) {
 
 define i16 @var_ctpop_i16(i16 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i16'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i16'
@@ -57,7 +57,7 @@ define i16 @var_ctpop_i16(i16 %a) {
 
 define i8 @var_ctpop_i8(i8 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i8'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i8'
@@ -87,27 +87,27 @@ declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
 
 define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v2i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v2i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v2i64'
@@ -115,7 +115,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v2i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
@@ -124,27 +124,27 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v4i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v4i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v4i64'
@@ -152,7 +152,7 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v4i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
@@ -161,27 +161,27 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i64'
@@ -189,7 +189,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
   %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
@@ -198,27 +198,27 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v4i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v4i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v4i32'
@@ -226,7 +226,7 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v4i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
   %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
@@ -235,27 +235,27 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i32'
@@ -263,7 +263,7 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
   %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
@@ -272,27 +272,27 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i32'
@@ -300,7 +300,7 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
   %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
@@ -309,31 +309,31 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i16'
@@ -346,31 +346,31 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i16'
@@ -383,31 +383,31 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v32i16'
@@ -420,31 +420,31 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i8'
@@ -457,31 +457,31 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v32i8'
@@ -494,31 +494,31 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v64i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/ctpop-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/ctpop-sizelatency.ll
index 9949907748cd..b46cfe15569e 100644
--- a/llvm/test/Analysis/CostModel/X86/ctpop-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctpop-sizelatency.ll
@@ -18,7 +18,7 @@ declare  i8 @llvm.ctpop.i8(i8)
 
 define i64 @var_ctpop_i64(i64 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i64'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i64'
@@ -31,7 +31,7 @@ define i64 @var_ctpop_i64(i64 %a) {
 
 define i32 @var_ctpop_i32(i32 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i32'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i32'
@@ -44,11 +44,11 @@ define i32 @var_ctpop_i32(i32 %a) {
 
 define i16 @var_ctpop_i16(i16 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i16'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i16'
-; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; POPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
 ; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctpop
 ;
   %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
@@ -57,11 +57,11 @@ define i16 @var_ctpop_i16(i16 %a) {
 
 define i8 @var_ctpop_i8(i8 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i8'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i8'
-; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; POPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
 ; POPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctpop
 ;
   %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
@@ -87,27 +87,27 @@ declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
 
 define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v2i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v2i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v2i64'
@@ -115,7 +115,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v2i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
@@ -124,27 +124,27 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v4i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v4i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v4i64'
@@ -152,7 +152,7 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v4i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
@@ -161,27 +161,27 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i64'
@@ -189,7 +189,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
   %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
@@ -198,27 +198,27 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v4i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v4i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v4i32'
@@ -226,7 +226,7 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v4i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
   %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
@@ -235,27 +235,27 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i32'
@@ -263,7 +263,7 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
   %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
@@ -272,27 +272,27 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i32'
@@ -300,7 +300,7 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
   %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
@@ -309,31 +309,31 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v8i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i16'
@@ -346,31 +346,31 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i16'
@@ -383,31 +383,31 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'var_ctpop_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v32i16'
@@ -420,31 +420,31 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i8'
@@ -457,31 +457,31 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v32i8'
@@ -494,31 +494,31 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'var_ctpop_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v64i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/ctpop.ll b/llvm/test/Analysis/CostModel/X86/ctpop.ll
index 5d7a7e650243..daf0566789bd 100644
--- a/llvm/test/Analysis/CostModel/X86/ctpop.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctpop.ll
@@ -136,11 +136,11 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v4i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v4i64'
@@ -173,7 +173,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v8i64'
@@ -181,7 +181,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i64'
@@ -189,7 +189,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v8i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
   %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
@@ -292,7 +292,7 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i32'
@@ -300,7 +300,7 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
 ;
   %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
@@ -329,7 +329,7 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v8i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v8i16'
@@ -358,19 +358,19 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v16i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v16i16'
@@ -395,7 +395,7 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i16'
@@ -403,7 +403,7 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i16'
@@ -469,11 +469,11 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v32i8'
@@ -481,7 +481,7 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v32i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX512BITALG-LABEL: 'var_ctpop_v32i8'
@@ -506,7 +506,7 @@ define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512F-LABEL: 'var_ctpop_v64i8'
@@ -514,7 +514,7 @@ define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512BW-LABEL: 'var_ctpop_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX512VPOPCNT-LABEL: 'var_ctpop_v64i8'


        


More information about the llvm-commits mailing list