[llvm] 2538add - [CostModel][X86] Add CostKinds handling for cttz
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 19 07:57:17 PDT 2022
Author: Simon Pilgrim
Date: 2022-09-19T15:57:03+01:00
New Revision: 2538adde5c89d1786973e4b30b574af3f228bc74
URL: https://github.com/llvm/llvm-project/commit/2538adde5c89d1786973e4b30b574af3f228bc74
DIFF: https://github.com/llvm/llvm-project/commit/2538adde5c89d1786973e4b30b574af3f228bc74.diff
LOG: [CostModel][X86] Add CostKinds handling for cttz
This was achieved with the 'cost-tables vs llvm-mca' script D103695
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
llvm/test/Analysis/CostModel/X86/cttz-latency.ll
llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
llvm/test/Analysis/CostModel/X86/cttz.ll
llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
llvm/test/Analysis/CostModel/X86/scalarize.ll
llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 0679d10c1af0..7450cfa628f8 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3342,6 +3342,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTLZ, MVT::v4i32, { 1 } },
{ ISD::CTLZ, MVT::v8i16, { 3 } },
{ ISD::CTLZ, MVT::v16i8, { 2 } },
+
+ { ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
+ { ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
+ { ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
+ { ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
+ { ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
+ { ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
};
static const CostKindTblEntry AVX512BWCostTbl[] = {
{ ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
@@ -3369,10 +3376,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::v16i8, { 2, 4, 8, 8 } },
{ ISD::CTPOP, MVT::v32i8, { 2, 4, 8, 8 } },
{ ISD::CTPOP, MVT::v64i8, { 2, 5, 8, 10 } },
- { ISD::CTTZ, MVT::v8i64, { 10 } },
- { ISD::CTTZ, MVT::v16i32, { 14 } },
- { ISD::CTTZ, MVT::v32i16, { 12 } },
- { ISD::CTTZ, MVT::v64i8, { 9 } },
+ { ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
+ { ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
+ { ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
+ { ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
+ { ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
+ { ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
{ ISD::ROTL, MVT::v32i16, { 2 } },
{ ISD::ROTL, MVT::v16i16, { 2 } },
{ ISD::ROTL, MVT::v8i16, { 2 } },
@@ -3427,10 +3436,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
{ ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
{ ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
- { ISD::CTTZ, MVT::v8i64, { 20 } },
- { ISD::CTTZ, MVT::v16i32, { 28 } },
- { ISD::CTTZ, MVT::v32i16, { 24 } },
- { ISD::CTTZ, MVT::v64i8, { 18 } },
+ { ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
+ { ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
+ { ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
+ { ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
{ ISD::ROTL, MVT::v8i64, { 1 } },
{ ISD::ROTL, MVT::v4i64, { 1 } },
{ ISD::ROTL, MVT::v2i64, { 1 } },
@@ -3567,14 +3576,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
{ ISD::CTPOP, MVT::v16i8, { 2, 5, 8, 8 } },
{ ISD::CTPOP, MVT::v32i8, { 3, 5, 8, 12 } },
- { ISD::CTTZ, MVT::v2i64, { 4 } },
- { ISD::CTTZ, MVT::v4i64, { 4 } },
- { ISD::CTTZ, MVT::v4i32, { 7 } },
- { ISD::CTTZ, MVT::v8i32, { 7 } },
- { ISD::CTTZ, MVT::v8i16, { 4 } },
- { ISD::CTTZ, MVT::v16i16, { 4 } },
- { ISD::CTTZ, MVT::v16i8, { 3 } },
- { ISD::CTTZ, MVT::v32i8, { 3 } },
+ { ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
+ { ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
+ { ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
+ { ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
+ { ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
+ { ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
+ { ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
+ { ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
{ ISD::SADDSAT, MVT::v16i16, { 1 } },
{ ISD::SADDSAT, MVT::v32i8, { 1 } },
{ ISD::SMAX, MVT::v8i32, { 1 } },
@@ -3634,10 +3643,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
{ ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
- { ISD::CTTZ, MVT::v4i64, { 22 } }, // 2 x 128-bit Op + extract/insert
- { ISD::CTTZ, MVT::v8i32, { 30 } }, // 2 x 128-bit Op + extract/insert
- { ISD::CTTZ, MVT::v16i16, { 26 } }, // 2 x 128-bit Op + extract/insert
- { ISD::CTTZ, MVT::v32i8, { 20 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
+ { ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
+ { ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
+ { ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
{ ISD::SADDSAT, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
{ ISD::SADDSAT, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
{ ISD::SMAX, MVT::v8i32, { 4 } }, // 2 x 128-bit Op + extract/insert
@@ -3721,10 +3734,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
{ ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
{ ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
- { ISD::CTTZ, MVT::v2i64, { 10 } },
- { ISD::CTTZ, MVT::v4i32, { 14 } },
- { ISD::CTTZ, MVT::v8i16, { 12 } },
- { ISD::CTTZ, MVT::v16i8, { 9 } }
+ { ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
+ { ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
+ { ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
+ { ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
};
static const CostKindTblEntry SSE2CostTbl[] = {
{ ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
@@ -3746,10 +3759,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
{ ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
{ ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
- { ISD::CTTZ, MVT::v2i64, { 14 } },
- { ISD::CTTZ, MVT::v4i32, { 18 } },
- { ISD::CTTZ, MVT::v8i16, { 16 } },
- { ISD::CTTZ, MVT::v16i8, { 13 } },
+ { ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
+ { ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
+ { ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
+ { ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
{ ISD::SADDSAT, MVT::v8i16, { 1 } },
{ ISD::SADDSAT, MVT::v16i8, { 1 } },
{ ISD::SMAX, MVT::v8i16, { 1 } },
diff --git a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
index e83dcae0d939..c4b58a4b4f69 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
@@ -124,27 +124,27 @@ declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v2i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v2i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v2i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
@@ -153,27 +153,27 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v2i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v2i64u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v2i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
@@ -182,27 +182,27 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v4i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -211,27 +211,27 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v4i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i64u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -240,27 +240,27 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v8i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
@@ -269,27 +269,27 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v8i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i64u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
@@ -298,27 +298,27 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v4i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
@@ -327,27 +327,27 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v4i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
@@ -356,27 +356,27 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v8i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
@@ -385,27 +385,27 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v8i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
@@ -414,27 +414,27 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v16i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
@@ -443,27 +443,27 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v16i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
@@ -471,18 +471,58 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
}
define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16'
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %cttz
}
define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16u'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16u'
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16u'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16u'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16u'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16u'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %cttz
@@ -490,27 +530,27 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v16i16'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
@@ -519,27 +559,27 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v16i16u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i16u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
@@ -548,28 +588,24 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v32i16'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
ret <32 x i16> %cttz
@@ -577,28 +613,24 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v32i16u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i16u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
ret <32 x i16> %cttz
@@ -606,27 +638,27 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v16i8'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
@@ -635,27 +667,27 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v16i8u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
@@ -664,27 +696,27 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v32i8'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
@@ -693,27 +725,27 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v32i8u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
@@ -722,28 +754,24 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v64i8'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v64i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
ret <64 x i8> %cttz
@@ -751,28 +779,24 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v64i8u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v64i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
ret <64 x i8> %cttz
diff --git a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
index 06ec07a2eb9b..ca024827c141 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
@@ -123,24 +123,24 @@ declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v2i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v2i64'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v2i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
@@ -148,24 +148,24 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
}
define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v2i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v2i64u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v2i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
@@ -173,24 +173,24 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
}
define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v4i64'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -198,24 +198,24 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
}
define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v4i64u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -223,24 +223,24 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
}
define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v8i64'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
@@ -248,24 +248,24 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
}
define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
+; SSE2-LABEL: 'var_cttz_v8i64u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
@@ -273,24 +273,24 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
}
define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v4i32'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
@@ -298,24 +298,24 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
}
define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v4i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v4i32u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
@@ -323,12 +323,12 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
}
define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v8i32'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32'
@@ -336,11 +336,11 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
@@ -348,12 +348,12 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
}
define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v8i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v8i32u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32u'
@@ -361,11 +361,11 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
@@ -373,12 +373,12 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
}
define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v16i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v16i32'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32'
@@ -386,11 +386,11 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
@@ -398,12 +398,12 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
}
define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
-; NOBMI-LABEL: 'var_cttz_v16i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
+; SSE2-LABEL: 'var_cttz_v16i32u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32u'
@@ -411,11 +411,11 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
@@ -423,18 +423,50 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
}
define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; SSE2-LABEL: 'var_cttz_v8i16'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %cttz
}
define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16u'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; SSE2-LABEL: 'var_cttz_v8i16u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16u'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16u'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16u'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16u'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %cttz
@@ -442,23 +474,23 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
; SSE2-LABEL: 'var_cttz_v16i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
@@ -467,23 +499,23 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
; SSE2-LABEL: 'var_cttz_v16i16u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
@@ -492,24 +524,20 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
; SSE2-LABEL: 'var_cttz_v32i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
ret <32 x i16> %cttz
@@ -517,24 +545,20 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
; SSE2-LABEL: 'var_cttz_v32i16u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
ret <32 x i16> %cttz
@@ -542,24 +566,20 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
; SSE2-LABEL: 'var_cttz_v16i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %cttz
@@ -567,24 +587,20 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
; SSE2-LABEL: 'var_cttz_v16i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %cttz
@@ -592,24 +608,20 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
; SSE2-LABEL: 'var_cttz_v32i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %cttz
@@ -617,24 +629,20 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
; SSE2-LABEL: 'var_cttz_v32i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %cttz
@@ -642,24 +650,20 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
; SSE2-LABEL: 'var_cttz_v64i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
ret <64 x i8> %cttz
@@ -667,24 +671,20 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
; SSE2-LABEL: 'var_cttz_v64i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
ret <64 x i8> %cttz
diff --git a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
index 160bd45f53d0..f6f77c56c5c8 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
@@ -124,27 +124,27 @@ declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v2i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v2i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v2i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
@@ -153,27 +153,27 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v2i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v2i64u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v2i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz
;
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
@@ -182,27 +182,27 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v4i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -211,27 +211,27 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v4i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i64u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -240,27 +240,27 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v8i64'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
@@ -269,27 +269,27 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
; NOBMI-LABEL: 'var_cttz_v8i64u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i64u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
@@ -298,27 +298,27 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v4i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
@@ -327,27 +327,27 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v4i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v4i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz
;
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
@@ -356,27 +356,27 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v8i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
@@ -385,27 +385,27 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v8i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v8i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v8i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz
;
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
@@ -414,27 +414,27 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v16i32'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
@@ -443,27 +443,27 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
; NOBMI-LABEL: 'var_cttz_v16i32u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i32u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
@@ -471,18 +471,58 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
}
define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16'
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %cttz
}
define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
-; CHECK-LABEL: 'var_cttz_v8i16u'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+; NOBMI-LABEL: 'var_cttz_v8i16u'
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE2-LABEL: 'var_cttz_v8i16u'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; SSE42-LABEL: 'var_cttz_v8i16u'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX1-LABEL: 'var_cttz_v8i16u'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX2-LABEL: 'var_cttz_v8i16u'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
+;
+; AVX512-LABEL: 'var_cttz_v8i16u'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %cttz
@@ -490,28 +530,24 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v16i16'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %cttz
@@ -519,28 +555,24 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v16i16u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i16u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %cttz
@@ -548,28 +580,24 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v32i16'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
ret <32 x i16> %cttz
@@ -577,28 +605,24 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
; NOBMI-LABEL: 'var_cttz_v32i16u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i16u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
ret <32 x i16> %cttz
@@ -606,27 +630,27 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v16i8'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
@@ -635,27 +659,27 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v16i8u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v16i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
@@ -664,28 +688,24 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v32i8'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %cttz
@@ -693,28 +713,24 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v32i8u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v32i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %cttz
@@ -722,28 +738,24 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v64i8'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v64i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
ret <64 x i8> %cttz
@@ -751,28 +763,24 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
; NOBMI-LABEL: 'var_cttz_v64i8u'
-; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE2-LABEL: 'var_cttz_v64i8u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
-;
-; AVX512-LABEL: 'var_cttz_v64i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
ret <64 x i8> %cttz
diff --git a/llvm/test/Analysis/CostModel/X86/cttz.ll b/llvm/test/Analysis/CostModel/X86/cttz.ll
index 94d004e0fbc0..f7ae2a65cf4a 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=NOBMI
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=BMI,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=BMI,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx | FileCheck %s -check-prefixes=BMI,AVX1
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=BMI,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=BMI,AVX512,AVX512F
@@ -148,11 +148,11 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64'
@@ -177,11 +177,11 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v2i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v2i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v2i64u'
@@ -206,19 +206,19 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
@@ -235,19 +235,19 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v4i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
; AVX512-LABEL: 'var_cttz_v4i64u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
@@ -264,32 +264,20 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
-; AVX512F-LABEL: 'var_cttz_v8i64'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v8i64'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v8i64'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v8i64'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
+; AVX512-LABEL: 'var_cttz_v8i64'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
ret <8 x i64> %cttz
@@ -305,32 +293,20 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i64u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i64u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i64u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
-; AVX512F-LABEL: 'var_cttz_v8i64u'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v8i64u'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v8i64u'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v8i64u'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
+; AVX512-LABEL: 'var_cttz_v8i64u'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
;
%cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
ret <8 x i64> %cttz
@@ -341,16 +317,12 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
; NOBMI-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
;
-; SSE2-LABEL: 'var_cttz_v4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v4i32'
+; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32'
@@ -370,16 +342,12 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
; NOBMI-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
;
-; SSE2-LABEL: 'var_cttz_v4i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v4i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v4i32u'
+; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v4i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v4i32u'
@@ -399,16 +367,12 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
; NOBMI-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
;
-; SSE2-LABEL: 'var_cttz_v8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v8i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v8i32'
+; SSE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32'
@@ -428,16 +392,12 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
; NOBMI-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
;
-; SSE2-LABEL: 'var_cttz_v8i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v8i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v8i32u'
+; SSE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i32u'
@@ -457,37 +417,21 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
; NOBMI-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
-; SSE2-LABEL: 'var_cttz_v16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v16i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v16i32'
+; SSE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
-; AVX512F-LABEL: 'var_cttz_v16i32'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v16i32'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v16i32'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v16i32'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; AVX512-LABEL: 'var_cttz_v16i32'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
ret <16 x i32> %cttz
@@ -498,37 +442,21 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
; NOBMI-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
-; SSE2-LABEL: 'var_cttz_v16i32u'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; SSE42-LABEL: 'var_cttz_v16i32u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; SSE-LABEL: 'var_cttz_v16i32u'
+; SSE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i32u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i32u'
; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
-; AVX512F-LABEL: 'var_cttz_v16i32u'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BW-LABEL: 'var_cttz_v16i32u'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512VPOPCNT-LABEL: 'var_cttz_v16i32u'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
-;
-; AVX512BITALG-LABEL: 'var_cttz_v16i32u'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
+; AVX512-LABEL: 'var_cttz_v16i32u'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
;
%cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
ret <16 x i32> %cttz
@@ -544,20 +472,32 @@ define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i16'
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
-; AVX512-LABEL: 'var_cttz_v8i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v8i16'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v8i16'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v8i16'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v8i16'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %cttz
@@ -573,20 +513,32 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v8i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v8i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v8i16u'
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
-; AVX512-LABEL: 'var_cttz_v8i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v8i16u'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v8i16u'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v8i16u'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v8i16u'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %cttz
@@ -602,20 +554,32 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
-; AVX512-LABEL: 'var_cttz_v16i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i16'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i16'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i16'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i16'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %cttz
@@ -631,20 +595,32 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
-; AVX512-LABEL: 'var_cttz_v16i16u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i16u'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i16u'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i16u'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i16u'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %cttz
@@ -660,31 +636,31 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512F-LABEL: 'var_cttz_v32i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512BW-LABEL: 'var_cttz_v32i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512VPOPCNT-LABEL: 'var_cttz_v32i16'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512BITALG-LABEL: 'var_cttz_v32i16'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
@@ -701,31 +677,31 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i16u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i16u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i16u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512F-LABEL: 'var_cttz_v32i16u'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512BW-LABEL: 'var_cttz_v32i16u'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512VPOPCNT-LABEL: 'var_cttz_v32i16u'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
; AVX512BITALG-LABEL: 'var_cttz_v32i16u'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
;
%cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
@@ -742,20 +718,32 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
-; AVX512-LABEL: 'var_cttz_v16i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i8'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i8'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i8'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i8'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %cttz
@@ -771,20 +759,32 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v16i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v16i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v16i8u'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
-; AVX512-LABEL: 'var_cttz_v16i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v16i8u'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v16i8u'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v16i8u'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v16i8u'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %cttz
@@ -800,20 +800,32 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
-; AVX512-LABEL: 'var_cttz_v32i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v32i8'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v32i8'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v32i8'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v32i8'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %cttz
@@ -829,20 +841,32 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v32i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v32i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v32i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
-; AVX512-LABEL: 'var_cttz_v32i8u'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+; AVX512F-LABEL: 'var_cttz_v32i8u'
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BW-LABEL: 'var_cttz_v32i8u'
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512VPOPCNT-LABEL: 'var_cttz_v32i8u'
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
+;
+; AVX512BITALG-LABEL: 'var_cttz_v32i8u'
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %cttz
@@ -858,31 +882,31 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512F-LABEL: 'var_cttz_v64i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512BW-LABEL: 'var_cttz_v64i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512VPOPCNT-LABEL: 'var_cttz_v64i8'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512BITALG-LABEL: 'var_cttz_v64i8'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
@@ -899,31 +923,31 @@ define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; SSE42-LABEL: 'var_cttz_v64i8u'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX1-LABEL: 'var_cttz_v64i8u'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX2-LABEL: 'var_cttz_v64i8u'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512F-LABEL: 'var_cttz_v64i8u'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512BW-LABEL: 'var_cttz_v64i8u'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512VPOPCNT-LABEL: 'var_cttz_v64i8u'
-; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
; AVX512BITALG-LABEL: 'var_cttz_v64i8u'
-; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
+; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
;
%cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 3c3eebff1359..defc9490f0db 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -238,17 +238,17 @@ define void @cttz(i32 %a, <16 x i32> %va) {
;
; LATE-LABEL: 'cttz'
; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; LATE-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'cttz'
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'cttz'
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
diff --git a/llvm/test/Analysis/CostModel/X86/scalarize.ll b/llvm/test/Analysis/CostModel/X86/scalarize.ll
index 0abecb45a893..ff24c9be4d8f 100644
--- a/llvm/test/Analysis/CostModel/X86/scalarize.ll
+++ b/llvm/test/Analysis/CostModel/X86/scalarize.ll
@@ -28,11 +28,11 @@ define void @test_scalarized_intrinsics() {
; CHECK64: cost of 1 {{.*}}bswap.v2i64
%r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
-; CHECK32: cost of 14 {{.*}}cttz.v4i32
-; CHECK64: cost of 14 {{.*}}cttz.v4i32
+; CHECK32: cost of 11 {{.*}}cttz.v4i32
+; CHECK64: cost of 11 {{.*}}cttz.v4i32
%r4 = call %i4 @llvm.cttz.v4i32(%i4 undef)
-; CHECK32: cost of 10 {{.*}}cttz.v2i64
-; CHECK64: cost of 10 {{.*}}cttz.v2i64
+; CHECK32: cost of 9 {{.*}}cttz.v2i64
+; CHECK64: cost of 9 {{.*}}cttz.v2i64
%r5 = call %i8 @llvm.cttz.v2i64(%i8 undef)
; CHECK32: ret
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
index c88e03b0a0f4..f6abe9bb22be 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=icelake-server -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
@@ -26,14 +26,38 @@ declare i8 @llvm.cttz.i8(i8, i1)
;
define void @cttz_2i64() #0 {
-; CHECK-LABEL: @cttz_2i64(
-; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
-; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
-; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
-; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
-; CHECK-NEXT: ret void
+; SSE-LABEL: @cttz_2i64(
+; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; SSE-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; SSE-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT: ret void
+;
+; AVX1-LABEL: @cttz_2i64(
+; AVX1-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX1-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; AVX1-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX1-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @cttz_2i64(
+; AVX2-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX2-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; AVX2-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; AVX2-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX2-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @cttz_2i64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 false)
+; AVX512-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
%ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
@@ -103,26 +127,20 @@ define void @cttz_4i64() #0 {
}
define void @cttz_4i32() #0 {
-; SSE2-LABEL: @cttz_4i32(
-; SSE2-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
-; SSE2-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
-; SSE2-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
-; SSE2-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
-; SSE2-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT: ret void
-;
-; SSE42-LABEL: @cttz_4i32(
-; SSE42-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
-; SSE42-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false)
-; SSE42-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE42-NEXT: ret void
+; SSE-LABEL: @cttz_4i32(
+; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
+; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
+; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
+; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
+; SSE-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+; SSE-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+; SSE-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+; SSE-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; SSE-NEXT: ret void
;
; AVX-LABEL: @cttz_4i32(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
@@ -146,41 +164,32 @@ define void @cttz_4i32() #0 {
}
define void @cttz_8i32() #0 {
-; SSE2-LABEL: @cttz_8i32(
-; SSE2-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; SSE2-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; SSE2-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; SSE2-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; SSE2-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; SSE2-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; SSE2-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; SSE2-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
-; SSE2-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
-; SSE2-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
-; SSE2-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
-; SSE2-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
-; SSE2-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false)
-; SSE2-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
-; SSE2-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
-; SSE2-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
-; SSE2-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; SSE2-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; SSE2-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; SSE2-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; SSE2-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; SSE2-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; SSE2-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; SSE2-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
-; SSE2-NEXT: ret void
-;
-; SSE42-LABEL: @cttz_8i32(
-; SSE42-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
-; SSE42-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false)
-; SSE42-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
-; SSE42-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
-; SSE42-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP3]], i1 false)
-; SSE42-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
-; SSE42-NEXT: ret void
+; SSE-LABEL: @cttz_8i32(
+; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
+; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
+; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
+; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
+; SSE-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false)
+; SSE-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
+; SSE-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
+; SSE-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
+; SSE-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+; SSE-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+; SSE-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+; SSE-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+; SSE-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+; SSE-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+; SSE-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+; SSE-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; SSE-NEXT: ret void
;
; AVX-LABEL: @cttz_8i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
@@ -494,14 +503,38 @@ define void @cttz_32i8() #0 {
;
define void @cttz_undef_2i64() #0 {
-; CHECK-LABEL: @cttz_undef_2i64(
-; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
-; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
-; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
-; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
-; CHECK-NEXT: ret void
+; SSE-LABEL: @cttz_undef_2i64(
+; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; SSE-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; SSE-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT: ret void
+;
+; AVX1-LABEL: @cttz_undef_2i64(
+; AVX1-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX1-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; AVX1-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX1-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @cttz_undef_2i64(
+; AVX2-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX2-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; AVX2-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; AVX2-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX2-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @cttz_undef_2i64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 true)
+; AVX512-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
%ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
More information about the llvm-commits
mailing list