[llvm] 10edf88 - [CostModel][X86] Update CTPOP costs

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 10 09:57:46 PDT 2022


Author: Simon Pilgrim
Date: 2022-09-10T17:57:20+01:00
New Revision: 10edf88458383280fb2e08cb94489ef8f6ce465f

URL: https://github.com/llvm/llvm-project/commit/10edf88458383280fb2e08cb94489ef8f6ce465f
DIFF: https://github.com/llvm/llvm-project/commit/10edf88458383280fb2e08cb94489ef8f6ce465f.diff

LOG: [CostModel][X86] Update CTPOP costs

With the bdver2 model updates, many of the AVX1 costs were far too high - it also helped expose some costs mismatches for Atom/Silvermont

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
    llvm/test/Analysis/CostModel/X86/ctpop.ll
    llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9b4067d6037bb..fea1c07c0d16d 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3428,14 +3428,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v8i32,   { 38 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTLZ,       MVT::v16i16,  { 30 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTLZ,       MVT::v32i8,   { 20 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v4i64,   { 16, 19, 19, 28 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v2i64,   {  7,  9, 10, 14 } },
-    { ISD::CTPOP,      MVT::v8i32,   { 24, 27, 27, 36 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v4i32,   { 11, 12, 14, 18 } },
-    { ISD::CTPOP,      MVT::v16i16,  { 20, 23, 22, 31 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v8i16,   {  9, 11, 11, 15 } },
-    { ISD::CTPOP,      MVT::v32i8,   { 14, 17, 16, 25 } }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v16i8,   {  6,  7,  8, 12 } },
+    { ISD::CTPOP,      MVT::v4i64,   { 14, 18, 19, 28 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v2i64,   {  7, 14, 10, 14 } },
+    { ISD::CTPOP,      MVT::v8i32,   { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v4i32,   {  9, 20, 14, 18 } },
+    { ISD::CTPOP,      MVT::v16i16,  { 16, 21, 22, 31 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v8i16,   {  8, 18, 11, 15 } },
+    { ISD::CTPOP,      MVT::v32i8,   { 13, 15, 16, 25 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v16i8,   {  6, 12,  8, 12 } },
     { ISD::CTTZ,       MVT::v4i64,   { 22 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTTZ,       MVT::v8i32,   { 30 } }, // 2 x 128-bit Op + extract/insert
     { ISD::CTTZ,       MVT::v16i16,  { 26 } }, // 2 x 128-bit Op + extract/insert
@@ -3519,10 +3519,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     { ISD::CTLZ,       MVT::v4i32,   { 18 } },
     { ISD::CTLZ,       MVT::v8i16,   { 14 } },
     { ISD::CTLZ,       MVT::v16i8,   {  9 } },
-    { ISD::CTPOP,      MVT::v2i64,   {  7, 19, 12, 18 } },
-    { ISD::CTPOP,      MVT::v4i32,   { 11, 24, 16, 22 } },
-    { ISD::CTPOP,      MVT::v8i16,   {  9, 18, 14, 20 } },
-    { ISD::CTPOP,      MVT::v16i8,   {  6, 12, 10, 16 } },
+    { ISD::CTPOP,      MVT::v2i64,   { 13, 19, 12, 18 } },
+    { ISD::CTPOP,      MVT::v4i32,   { 18, 24, 16, 22 } },
+    { ISD::CTPOP,      MVT::v8i16,   { 13, 18, 14, 20 } },
+    { ISD::CTPOP,      MVT::v16i8,   { 11, 12, 10, 16 } },
     { ISD::CTTZ,       MVT::v2i64,   { 10 } },
     { ISD::CTTZ,       MVT::v4i32,   { 14 } },
     { ISD::CTTZ,       MVT::v8i16,   { 12 } },

diff  --git a/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll b/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
index 8189847865633..27c6e253a4f10 100644
--- a/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctpop-latency.ll
@@ -95,7 +95,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v2i64'
@@ -132,7 +132,7 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i64'
@@ -169,7 +169,7 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i64'
@@ -206,7 +206,7 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i32'
@@ -243,7 +243,7 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i32'
@@ -280,7 +280,7 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i32'
@@ -317,7 +317,7 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i16'
@@ -354,7 +354,7 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i16'
@@ -391,7 +391,7 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i16'
@@ -428,7 +428,7 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i8'
@@ -465,7 +465,7 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i8'
@@ -502,7 +502,7 @@ define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v64i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/ctpop.ll b/llvm/test/Analysis/CostModel/X86/ctpop.ll
index daf0566789bd6..1fd4a92b83754 100644
--- a/llvm/test/Analysis/CostModel/X86/ctpop.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctpop.ll
@@ -91,7 +91,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v2i64'
@@ -128,11 +128,11 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i64'
@@ -165,11 +165,11 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i64'
@@ -202,11 +202,11 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v4i32'
@@ -239,11 +239,11 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i32'
@@ -276,11 +276,11 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i32'
@@ -313,11 +313,11 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v8i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v8i16'
@@ -350,11 +350,11 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v16i16'
@@ -387,11 +387,11 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i16'
@@ -424,7 +424,7 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v16i8'
@@ -461,11 +461,11 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v32i8'
@@ -498,11 +498,11 @@ define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
 ;
 ; SSE42-LABEL: 'var_ctpop_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX1-LABEL: 'var_ctpop_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
 ;
 ; AVX2-LABEL: 'var_ctpop_v64i8'

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
index d87fa110531b0..ac4f38bde5575 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
@@ -143,26 +143,11 @@ define void @ctpop_4i32() #0 {
 ; SSE42-NEXT:    store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
 ; SSE42-NEXT:    ret void
 ;
-; AVX1-LABEL: @ctpop_4i32(
-; AVX1-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
-; AVX1-NEXT:    [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
-; AVX1-NEXT:    [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
-; AVX1-NEXT:    [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
-; AVX1-NEXT:    [[CTPOP3:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD3]])
-; AVX1-NEXT:    store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; AVX1-NEXT:    store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; AVX1-NEXT:    store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; AVX1-NEXT:    store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
-; AVX1-NEXT:    ret void
-;
-; AVX2-LABEL: @ctpop_4i32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
-; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
-; AVX2-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
-; AVX2-NEXT:    ret void
+; AVX-LABEL: @ctpop_4i32(
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
+; AVX-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
   %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
@@ -216,38 +201,11 @@ define void @ctpop_8i32() #0 {
 ; SSE42-NEXT:    store i32 [[CTPOP7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
 ; SSE42-NEXT:    ret void
 ;
-; AVX1-LABEL: @ctpop_8i32(
-; AVX1-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; AVX1-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; AVX1-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; AVX1-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; AVX1-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; AVX1-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; AVX1-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; AVX1-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
-; AVX1-NEXT:    [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
-; AVX1-NEXT:    [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
-; AVX1-NEXT:    [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
-; AVX1-NEXT:    [[CTPOP3:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD3]])
-; AVX1-NEXT:    [[CTPOP4:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD4]])
-; AVX1-NEXT:    [[CTPOP5:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD5]])
-; AVX1-NEXT:    [[CTPOP6:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD6]])
-; AVX1-NEXT:    [[CTPOP7:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD7]])
-; AVX1-NEXT:    store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; AVX1-NEXT:    store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; AVX1-NEXT:    store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; AVX1-NEXT:    store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; AVX1-NEXT:    store i32 [[CTPOP4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; AVX1-NEXT:    store i32 [[CTPOP5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; AVX1-NEXT:    store i32 [[CTPOP6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; AVX1-NEXT:    store i32 [[CTPOP7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
-; AVX1-NEXT:    ret void
-;
-; AVX2-LABEL: @ctpop_8i32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
-; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
-; AVX2-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
-; AVX2-NEXT:    ret void
+; AVX-LABEL: @ctpop_8i32(
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
+; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
   %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2


        


More information about the llvm-commits mailing list