[llvm] 2538add - [CostModel][X86] Add CostKinds handling for cttz

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 19 07:57:17 PDT 2022


Author: Simon Pilgrim
Date: 2022-09-19T15:57:03+01:00
New Revision: 2538adde5c89d1786973e4b30b574af3f228bc74

URL: https://github.com/llvm/llvm-project/commit/2538adde5c89d1786973e4b30b574af3f228bc74
DIFF: https://github.com/llvm/llvm-project/commit/2538adde5c89d1786973e4b30b574af3f228bc74.diff

LOG: [CostModel][X86] Add CostKinds handling for cttz

This was achieved with the 'cost-tables vs llvm-mca' script D103695

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
    llvm/test/Analysis/CostModel/X86/cttz-latency.ll
    llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/cttz.ll
    llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
    llvm/test/Analysis/CostModel/X86/scalarize.ll
    llvm/test/Transforms/SLPVectorizer/X86/cttz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 0679d10c1af0..7450cfa628f8 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3342,6 +3342,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v4i32,   {  1 } },
     { ISD::CTLZ,       MVT::v8i16,   {  3 } },
     { ISD::CTLZ,       MVT::v16i8,   {  2 } },
+
+    { ISD::CTTZ,       MVT::v8i64,   {  2,  8,  6,  7 } },
+    { ISD::CTTZ,       MVT::v16i32,  {  2,  8,  6,  7 } },
+    { ISD::CTTZ,       MVT::v4i64,   {  1,  8,  6,  6 } },
+    { ISD::CTTZ,       MVT::v8i32,   {  1,  8,  6,  6 } },
+    { ISD::CTTZ,       MVT::v2i64,   {  1,  8,  6,  6 } },
+    { ISD::CTTZ,       MVT::v4i32,   {  1,  8,  6,  6 } },
   };
   static const CostKindTblEntry AVX512BWCostTbl[] = {
     { ISD::ABS,        MVT::v32i16,  {  1,  1,  1,  1 } },
@@ -3369,10 +3376,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTPOP,      MVT::v16i8,   {  2,  4,  8,  8 } },
     { ISD::CTPOP,      MVT::v32i8,   {  2,  4,  8,  8 } },
     { ISD::CTPOP,      MVT::v64i8,   {  2,  5,  8, 10 } },
-    { ISD::CTTZ,       MVT::v8i64,   { 10 } },
-    { ISD::CTTZ,       MVT::v16i32,  { 14 } },
-    { ISD::CTTZ,       MVT::v32i16,  { 12 } },
-    { ISD::CTTZ,       MVT::v64i8,   {  9 } },
+    { ISD::CTTZ,       MVT::v8i16,   {  3,  9, 14, 14 } },
+    { ISD::CTTZ,       MVT::v16i16,  {  3,  9, 14, 14 } },
+    { ISD::CTTZ,       MVT::v32i16,  {  3, 10, 14, 16 } },
+    { ISD::CTTZ,       MVT::v16i8,   {  2,  6, 11, 11 } },
+    { ISD::CTTZ,       MVT::v32i8,   {  2,  6, 11, 11 } },
+    { ISD::CTTZ,       MVT::v64i8,   {  3,  7, 11, 13 } },
     { ISD::ROTL,       MVT::v32i16,  {  2 } },
     { ISD::ROTL,       MVT::v16i16,  {  2 } },
     { ISD::ROTL,       MVT::v8i16,   {  2 } },
@@ -3427,10 +3436,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTPOP,      MVT::v16i32,  { 24, 19, 27, 27 } },
     { ISD::CTPOP,      MVT::v32i16,  { 18, 15, 22, 22 } },
     { ISD::CTPOP,      MVT::v64i8,   { 12, 11, 16, 16 } },
-    { ISD::CTTZ,       MVT::v8i64,   { 20 } },
-    { ISD::CTTZ,       MVT::v16i32,  { 28 } },
-    { ISD::CTTZ,       MVT::v32i16,  { 24 } },
-    { ISD::CTTZ,       MVT::v64i8,   { 18 } },
+    { ISD::CTTZ,       MVT::v8i64,   {  2,  8,  6,  7 } },
+    { ISD::CTTZ,       MVT::v16i32,  {  2,  8,  6,  7 } },
+    { ISD::CTTZ,       MVT::v32i16,  {  7, 17, 27, 27 } },
+    { ISD::CTTZ,       MVT::v64i8,   {  6, 13, 21, 21 } },
     { ISD::ROTL,       MVT::v8i64,   {  1 } },
     { ISD::ROTL,       MVT::v4i64,   {  1 } },
     { ISD::ROTL,       MVT::v2i64,   {  1 } },
@@ -3567,14 +3576,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTPOP,      MVT::v16i16,  {  6,  8, 11, 18 } },
     { ISD::CTPOP,      MVT::v16i8,   {  2,  5,  8,  8 } },
     { ISD::CTPOP,      MVT::v32i8,   {  3,  5,  8, 12 } },
-    { ISD::CTTZ,       MVT::v2i64,   {  4 } },
-    { ISD::CTTZ,       MVT::v4i64,   {  4 } },
-    { ISD::CTTZ,       MVT::v4i32,   {  7 } },
-    { ISD::CTTZ,       MVT::v8i32,   {  7 } },
-    { ISD::CTTZ,       MVT::v8i16,   {  4 } },
-    { ISD::CTTZ,       MVT::v16i16,  {  4 } },
-    { ISD::CTTZ,       MVT::v16i8,   {  3 } },
-    { ISD::CTTZ,       MVT::v32i8,   {  3 } },
+    { ISD::CTTZ,       MVT::v2i64,   {  4, 11, 13, 13 } },
+    { ISD::CTTZ,       MVT::v4i64,   {  5, 11, 13, 20 } },
+    { ISD::CTTZ,       MVT::v4i32,   {  7, 14, 17, 17 } },
+    { ISD::CTTZ,       MVT::v8i32,   {  7, 15, 17, 24 } },
+    { ISD::CTTZ,       MVT::v8i16,   {  4,  9, 14, 14 } },
+    { ISD::CTTZ,       MVT::v16i16,  {  6,  9, 14, 24 } },
+    { ISD::CTTZ,       MVT::v16i8,   {  3,  7, 11, 11 } },
+    { ISD::CTTZ,       MVT::v32i8,   {  5,  7, 11, 18 } },
     { ISD::SADDSAT,    MVT::v16i16,  {  1 } },
     { ISD::SADDSAT,    MVT::v32i8,   {  1 } },
     { ISD::SMAX,       MVT::v8i32,   {  1 } },
@@ -3634,10 +3643,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTPOP,      MVT::v8i16,   {  8, 18, 11, 15 } },
     { ISD::CTPOP,      MVT::v32i8,   { 13, 15, 16, 25 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTPOP,      MVT::v16i8,   {  6, 12,  8, 12 } },
-    { ISD::CTTZ,       MVT::v4i64,   { 22 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTTZ,       MVT::v8i32,   { 30 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTTZ,       MVT::v16i16,  { 26 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTTZ,       MVT::v32i8,   { 20 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v4i64,   { 17, 22, 24, 33 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v2i64,   {  9, 19, 13, 17 } },
+    { ISD::CTTZ,       MVT::v8i32,   { 21, 27, 32, 41 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v4i32,   { 11, 24, 17, 21 } },
+    { ISD::CTTZ,       MVT::v16i16,  { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v8i16,   {  9, 21, 14, 18 } },
+    { ISD::CTTZ,       MVT::v32i8,   { 15, 18, 21, 30 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v16i8,   {  8, 16, 11, 15 } },
     { ISD::SADDSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
     { ISD::SADDSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
     { ISD::SMAX,       MVT::v8i32,   {  4 } }, // 2 x 128-bit Op + extract/insert
@@ -3721,10 +3734,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTPOP,      MVT::v4i32,   { 18, 24, 16, 22 } },
     { ISD::CTPOP,      MVT::v8i16,   { 13, 18, 14, 20 } },
     { ISD::CTPOP,      MVT::v16i8,   { 11, 12, 10, 16 } },
-    { ISD::CTTZ,       MVT::v2i64,   { 10 } },
-    { ISD::CTTZ,       MVT::v4i32,   { 14 } },
-    { ISD::CTTZ,       MVT::v8i16,   { 12 } },
-    { ISD::CTTZ,       MVT::v16i8,   {  9 } }
+    { ISD::CTTZ,       MVT::v2i64,   { 13, 25, 15, 22 } },
+    { ISD::CTTZ,       MVT::v4i32,   { 18, 26, 19, 25 } },
+    { ISD::CTTZ,       MVT::v8i16,   { 13, 20, 17, 23 } },
+    { ISD::CTTZ,       MVT::v16i8,   { 11, 16, 13, 19 } }
   };
   static const CostKindTblEntry SSE2CostTbl[] = {
     { ISD::ABS,        MVT::v2i64,   {  3,  6,  5,  5 } },
@@ -3746,10 +3759,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTPOP,      MVT::v4i32,   { 15, 29, 21, 23 } },
     { ISD::CTPOP,      MVT::v8i16,   { 13, 25, 18, 20 } },
     { ISD::CTPOP,      MVT::v16i8,   { 10, 21, 14, 16 } },
-    { ISD::CTTZ,       MVT::v2i64,   { 14 } },
-    { ISD::CTTZ,       MVT::v4i32,   { 18 } },
-    { ISD::CTTZ,       MVT::v8i16,   { 16 } },
-    { ISD::CTTZ,       MVT::v16i8,   { 13 } },
+    { ISD::CTTZ,       MVT::v2i64,   { 14, 28, 19, 21 } },
+    { ISD::CTTZ,       MVT::v4i32,   { 18, 31, 24, 26 } },
+    { ISD::CTTZ,       MVT::v8i16,   { 16, 27, 21, 23 } },
+    { ISD::CTTZ,       MVT::v16i8,   { 13, 23, 17, 19 } },
     { ISD::SADDSAT,    MVT::v8i16,   {  1 } },
     { ISD::SADDSAT,    MVT::v16i8,   {  1 } },
     { ISD::SMAX,       MVT::v8i16,   {  1 } },

diff  --git a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
index e83dcae0d939..c4b58a4b4f69 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
@@ -124,27 +124,27 @@ declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
 
 define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v2i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
   %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
@@ -153,27 +153,27 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
 
 define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v2i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v2i64u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v2i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
   %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
@@ -182,27 +182,27 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
 
 define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -211,27 +211,27 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
 
 define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i64u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -240,27 +240,27 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
 
 define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
@@ -269,27 +269,27 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
 
 define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i64u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
@@ -298,27 +298,27 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
 
 define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
@@ -327,27 +327,27 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
 
 define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
@@ -356,27 +356,27 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
 
 define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
   %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
@@ -385,27 +385,27 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
 
 define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
   %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
@@ -414,27 +414,27 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
 
 define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
@@ -443,27 +443,27 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
 
 define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
@@ -471,18 +471,58 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
 }
 
 define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16'
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
   ret <8 x i16> %cttz
 }
 
 define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16u'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16u'
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16u'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16u'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16u'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16u'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
   ret <8 x i16> %cttz
@@ -490,27 +530,27 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
 
 define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i16'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
@@ -519,27 +559,27 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
 
 define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i16u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i16u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
@@ -548,28 +588,24 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
 
 define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i16'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
   ret <32 x i16> %cttz
@@ -577,28 +613,24 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
 
 define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i16u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i16u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
   ret <32 x i16> %cttz
@@ -606,27 +638,27 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
 
 define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i8'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
@@ -635,27 +667,27 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
 
 define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i8u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
@@ -664,27 +696,27 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
 
 define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i8'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
@@ -693,27 +725,27 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
 
 define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i8u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
@@ -722,28 +754,24 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
 
 define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v64i8'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
   ret <64 x i8> %cttz
@@ -751,28 +779,24 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
 
 define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v64i8u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v64i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
   ret <64 x i8> %cttz

diff  --git a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
index 06ec07a2eb9b..ca024827c141 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
@@ -123,24 +123,24 @@ declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
 declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
 
 define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v2i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v2i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
   %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
@@ -148,24 +148,24 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
 }
 
 define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v2i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v2i64u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v2i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
   %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
@@ -173,24 +173,24 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
 }
 
 define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v4i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -198,24 +198,24 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
 }
 
 define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v4i64u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -223,24 +223,24 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
 }
 
 define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v8i64'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
@@ -248,24 +248,24 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
 }
 
 define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v8i64u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
@@ -273,24 +273,24 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
 }
 
 define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v4i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
@@ -298,24 +298,24 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
 }
 
 define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v4i32u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
@@ -323,12 +323,12 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
 }
 
 define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v8i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32'
@@ -336,11 +336,11 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
   %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
@@ -348,12 +348,12 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
 }
 
 define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v8i32u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32u'
@@ -361,11 +361,11 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
   %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
@@ -373,12 +373,12 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
 }
 
 define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v16i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v16i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32'
@@ -386,11 +386,11 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
@@ -398,12 +398,12 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
 }
 
 define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v16i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v16i32u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32u'
@@ -411,11 +411,11 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
@@ -423,18 +423,50 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
 }
 
 define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; SSE2-LABEL: 'var_cttz_v8i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
   ret <8 x i16> %cttz
 }
 
 define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16u'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; SSE2-LABEL: 'var_cttz_v8i16u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16u'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16u'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16u'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16u'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
   ret <8 x i16> %cttz
@@ -442,23 +474,23 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
 
 define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'var_cttz_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
@@ -467,23 +499,23 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
 
 define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
 ; SSE2-LABEL: 'var_cttz_v16i16u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
@@ -492,24 +524,20 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
 
 define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'var_cttz_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
   ret <32 x i16> %cttz
@@ -517,24 +545,20 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
 
 define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
 ; SSE2-LABEL: 'var_cttz_v32i16u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
   ret <32 x i16> %cttz
@@ -542,24 +566,20 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
 
 define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'var_cttz_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
   ret <16 x i8> %cttz
@@ -567,24 +587,20 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
 
 define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
 ; SSE2-LABEL: 'var_cttz_v16i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
   ret <16 x i8> %cttz
@@ -592,24 +608,20 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
 
 define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'var_cttz_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
   ret <32 x i8> %cttz
@@ -617,24 +629,20 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
 
 define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
 ; SSE2-LABEL: 'var_cttz_v32i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
   ret <32 x i8> %cttz
@@ -642,24 +650,20 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
 
 define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'var_cttz_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
   ret <64 x i8> %cttz
@@ -667,24 +671,20 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
 
 define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
 ; SSE2-LABEL: 'var_cttz_v64i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
   ret <64 x i8> %cttz

diff  --git a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
index 160bd45f53d0..f6f77c56c5c8 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
@@ -124,27 +124,27 @@ declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
 
 define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v2i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
   %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
@@ -153,27 +153,27 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
 
 define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v2i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v2i64u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v2i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
 ;
   %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
@@ -182,27 +182,27 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
 
 define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -211,27 +211,27 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
 
 define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i64u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -240,27 +240,27 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
 
 define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i64'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
@@ -269,27 +269,27 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
 
 define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i64u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i64u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
@@ -298,27 +298,27 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
 
 define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
@@ -327,27 +327,27 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
 
 define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v4i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v4i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
 ;
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
@@ -356,27 +356,27 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
 
 define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
   %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
@@ -385,27 +385,27 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
 
 define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v8i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v8i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v8i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
 ;
   %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
@@ -414,27 +414,27 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
 
 define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
@@ -443,27 +443,27 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
 
 define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i32u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i32u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
@@ -471,18 +471,58 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
 }
 
 define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16'
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
   ret <8 x i16> %cttz
 }
 
 define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16u'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16u'
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16u'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16u'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16u'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16u'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16u'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
   ret <8 x i16> %cttz
@@ -490,28 +530,24 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
 
 define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i16'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
   ret <16 x i16> %cttz
@@ -519,28 +555,24 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
 
 define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i16u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i16u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
   ret <16 x i16> %cttz
@@ -548,28 +580,24 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
 
 define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i16'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
   ret <32 x i16> %cttz
@@ -577,28 +605,24 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
 
 define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i16u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i16u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
   ret <32 x i16> %cttz
@@ -606,27 +630,27 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
 
 define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i8'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
@@ -635,27 +659,27 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
 
 define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v16i8u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v16i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
@@ -664,28 +688,24 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
 
 define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i8'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
   ret <32 x i8> %cttz
@@ -693,28 +713,24 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
 
 define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v32i8u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v32i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
   ret <32 x i8> %cttz
@@ -722,28 +738,24 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
 
 define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v64i8'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
   ret <64 x i8> %cttz
@@ -751,28 +763,24 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
 
 define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
 ; NOBMI-LABEL: 'var_cttz_v64i8u'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; NOBMI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE2-LABEL: 'var_cttz_v64i8u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
   ret <64 x i8> %cttz

diff  --git a/llvm/test/Analysis/CostModel/X86/cttz.ll b/llvm/test/Analysis/CostModel/X86/cttz.ll
index 94d004e0fbc0..f7ae2a65cf4a 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=NOBMI
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=BMI,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=BMI,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE,SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx | FileCheck %s -check-prefixes=BMI,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=BMI,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=BMI,AVX512,AVX512F
@@ -148,11 +148,11 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64'
@@ -177,11 +177,11 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v2i64u'
@@ -206,19 +206,19 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -235,19 +235,19 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
 ; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -264,32 +264,20 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
-; AVX512F-LABEL: 'var_cttz_v8i64'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v8i64'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v8i64'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v8i64'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
+; AVX512-LABEL: 'var_cttz_v8i64'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
   ret <8 x i64> %cttz
@@ -305,32 +293,20 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
-; AVX512F-LABEL: 'var_cttz_v8i64u'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v8i64u'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v8i64u'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v8i64u'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
+; AVX512-LABEL: 'var_cttz_v8i64u'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
 ;
   %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
   ret <8 x i64> %cttz
@@ -341,16 +317,12 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
 ;
-; SSE2-LABEL: 'var_cttz_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32'
@@ -370,16 +342,12 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
 ;
-; SSE2-LABEL: 'var_cttz_v4i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v4i32u'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v4i32u'
@@ -399,16 +367,12 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
 ;
-; SSE2-LABEL: 'var_cttz_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32'
@@ -428,16 +392,12 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
 ;
-; SSE2-LABEL: 'var_cttz_v8i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v8i32u'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i32u'
@@ -457,37 +417,21 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
-; SSE2-LABEL: 'var_cttz_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
-; AVX512F-LABEL: 'var_cttz_v16i32'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v16i32'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v16i32'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v16i32'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; AVX512-LABEL: 'var_cttz_v16i32'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
   ret <16 x i32> %cttz
@@ -498,37 +442,21 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; NOBMI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
-; SSE2-LABEL: 'var_cttz_v16i32u'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v16i32u'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i32u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i32u'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
-; AVX512F-LABEL: 'var_cttz_v16i32u'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v16i32u'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v16i32u'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v16i32u'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; AVX512-LABEL: 'var_cttz_v16i32u'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
 ;
   %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
   ret <16 x i32> %cttz
@@ -544,20 +472,32 @@ define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v8i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v8i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v8i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v8i16'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v8i16'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
   ret <8 x i16> %cttz
@@ -573,20 +513,32 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v8i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v8i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v8i16u'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v8i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v8i16u'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v8i16u'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v8i16u'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v8i16u'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
   ret <8 x i16> %cttz
@@ -602,20 +554,32 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i16'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i16'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i16'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i16'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
   ret <16 x i16> %cttz
@@ -631,20 +595,32 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i16u'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i16u'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i16u'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i16u'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
   ret <16 x i16> %cttz
@@ -660,31 +636,31 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512F-LABEL: 'var_cttz_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512BW-LABEL: 'var_cttz_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512VPOPCNT-LABEL: 'var_cttz_v32i16'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512BITALG-LABEL: 'var_cttz_v32i16'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
@@ -701,31 +677,31 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512F-LABEL: 'var_cttz_v32i16u'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512BW-LABEL: 'var_cttz_v32i16u'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512VPOPCNT-LABEL: 'var_cttz_v32i16u'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
 ; AVX512BITALG-LABEL: 'var_cttz_v32i16u'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
 ;
   %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
@@ -742,20 +718,32 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i8'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i8'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
   ret <16 x i8> %cttz
@@ -771,20 +759,32 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v16i8u'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i8u'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i8u'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i8u'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i8u'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
   ret <16 x i8> %cttz
@@ -800,20 +800,32 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v32i8'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v32i8'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v32i8'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v32i8'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
   ret <32 x i8> %cttz
@@ -829,20 +841,32 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
-; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v32i8u'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v32i8u'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v32i8u'
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v32i8u'
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
   ret <32 x i8> %cttz
@@ -858,31 +882,31 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512F-LABEL: 'var_cttz_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512BW-LABEL: 'var_cttz_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512VPOPCNT-LABEL: 'var_cttz_v64i8'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512BITALG-LABEL: 'var_cttz_v64i8'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
@@ -899,31 +923,31 @@ define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512F-LABEL: 'var_cttz_v64i8u'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512BW-LABEL: 'var_cttz_v64i8u'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512VPOPCNT-LABEL: 'var_cttz_v64i8u'
-; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX512VPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
 ; AVX512BITALG-LABEL: 'var_cttz_v64i8u'
-; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
 ; AVX512BITALG-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
 ;
   %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 3c3eebff1359..defc9490f0db 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -238,17 +238,17 @@ define void @cttz(i32 %a, <16 x i32> %va) {
 ;
 ; LATE-LABEL: 'cttz'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'cttz'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'cttz'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)

diff  --git a/llvm/test/Analysis/CostModel/X86/scalarize.ll b/llvm/test/Analysis/CostModel/X86/scalarize.ll
index 0abecb45a893..ff24c9be4d8f 100644
--- a/llvm/test/Analysis/CostModel/X86/scalarize.ll
+++ b/llvm/test/Analysis/CostModel/X86/scalarize.ll
@@ -28,11 +28,11 @@ define void @test_scalarized_intrinsics() {
 ; CHECK64: cost of 1 {{.*}}bswap.v2i64
         %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
 
-; CHECK32: cost of 14 {{.*}}cttz.v4i32
-; CHECK64: cost of 14 {{.*}}cttz.v4i32
+; CHECK32: cost of 11 {{.*}}cttz.v4i32
+; CHECK64: cost of 11 {{.*}}cttz.v4i32
         %r4 = call %i4 @llvm.cttz.v4i32(%i4 undef)
-; CHECK32: cost of 10 {{.*}}cttz.v2i64
-; CHECK64: cost of 10 {{.*}}cttz.v2i64
+; CHECK32: cost of 9 {{.*}}cttz.v2i64
+; CHECK64: cost of 9 {{.*}}cttz.v2i64
         %r5 = call %i8 @llvm.cttz.v2i64(%i8 undef)
 
 ; CHECK32: ret

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
index c88e03b0a0f4..f6abe9bb22be 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=icelake-server -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
@@ -26,14 +26,38 @@ declare  i8 @llvm.cttz.i8(i8, i1)
 ;
 
 define void @cttz_2i64() #0 {
-; CHECK-LABEL: @cttz_2i64(
-; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
-; CHECK-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
-; CHECK-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
-; CHECK-NEXT:    ret void
+; SSE-LABEL: @cttz_2i64(
+; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; SSE-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; SSE-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; SSE-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    ret void
+;
+; AVX1-LABEL: @cttz_2i64(
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; AVX1-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; AVX1-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX1-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    ret void
+;
+; AVX2-LABEL: @cttz_2i64(
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; AVX2-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; AVX2-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX2-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT:    ret void
+;
+; AVX512-LABEL: @cttz_2i64(
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 false)
+; AVX512-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT:    ret void
 ;
   %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
   %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
@@ -103,26 +127,20 @@ define void @cttz_4i64() #0 {
 }
 
 define void @cttz_4i32() #0 {
-; SSE2-LABEL: @cttz_4i32(
-; SSE2-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
-; SSE2-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
-; SSE2-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
-; SSE2-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
-; SSE2-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    ret void
-;
-; SSE42-LABEL: @cttz_4i32(
-; SSE42-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
-; SSE42-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false)
-; SSE42-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE42-NEXT:    ret void
+; SSE-LABEL: @cttz_4i32(
+; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
+; SSE-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
+; SSE-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
+; SSE-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
+; SSE-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+; SSE-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+; SSE-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+; SSE-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_4i32(
 ; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
@@ -146,41 +164,32 @@ define void @cttz_4i32() #0 {
 }
 
 define void @cttz_8i32() #0 {
-; SSE2-LABEL: @cttz_8i32(
-; SSE2-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; SSE2-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; SSE2-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; SSE2-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; SSE2-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; SSE2-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; SSE2-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; SSE2-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
-; SSE2-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
-; SSE2-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
-; SSE2-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
-; SSE2-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
-; SSE2-NEXT:    [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false)
-; SSE2-NEXT:    [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
-; SSE2-NEXT:    [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
-; SSE2-NEXT:    [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
-; SSE2-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; SSE2-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; SSE2-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; SSE2-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; SSE2-NEXT:    store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; SSE2-NEXT:    store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; SSE2-NEXT:    store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; SSE2-NEXT:    store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
-; SSE2-NEXT:    ret void
-;
-; SSE42-LABEL: @cttz_8i32(
-; SSE42-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
-; SSE42-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false)
-; SSE42-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
-; SSE42-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
-; SSE42-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP3]], i1 false)
-; SSE42-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
-; SSE42-NEXT:    ret void
+; SSE-LABEL: @cttz_8i32(
+; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+; SSE-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+; SSE-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+; SSE-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+; SSE-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; SSE-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
+; SSE-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
+; SSE-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
+; SSE-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
+; SSE-NEXT:    [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false)
+; SSE-NEXT:    [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
+; SSE-NEXT:    [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
+; SSE-NEXT:    [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
+; SSE-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+; SSE-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+; SSE-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+; SSE-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+; SSE-NEXT:    store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+; SSE-NEXT:    store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+; SSE-NEXT:    store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+; SSE-NEXT:    store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_8i32(
 ; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
@@ -494,14 +503,38 @@ define void @cttz_32i8() #0 {
 ;
 
 define void @cttz_undef_2i64() #0 {
-; CHECK-LABEL: @cttz_undef_2i64(
-; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
-; CHECK-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
-; CHECK-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
-; CHECK-NEXT:    ret void
+; SSE-LABEL: @cttz_undef_2i64(
+; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; SSE-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; SSE-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; SSE-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    ret void
+;
+; AVX1-LABEL: @cttz_undef_2i64(
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; AVX1-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; AVX1-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX1-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    ret void
+;
+; AVX2-LABEL: @cttz_undef_2i64(
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; AVX2-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; AVX2-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX2-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT:    ret void
+;
+; AVX512-LABEL: @cttz_undef_2i64(
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 true)
+; AVX512-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT:    ret void
 ;
   %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
   %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8


        


More information about the llvm-commits mailing list