[llvm] r374775 - [CostModel][X86] Add CTPOP scalar costs (PR43656)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 14 07:07:44 PDT 2019


Author: rksimon
Date: Mon Oct 14 07:07:43 2019
New Revision: 374775

URL: http://llvm.org/viewvc/llvm-project?rev=374775&view=rev
Log:
[CostModel][X86] Add CTPOP scalar costs (PR43656)

Add specific scalar costs for ctpop instructions, these are based on the llvm-mca's SLM throughput numbers (the oldest model we have).

For targets supporting POPCNT, we provide overrides that assume 1cy costs.

Modified:
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/X86/ctpop.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/ctpop.ll

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=374775&r1=374774&r2=374775&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Mon Oct 14 07:07:43 2019
@@ -2103,8 +2103,17 @@ int X86TTIImpl::getIntrinsicInstrCost(In
     { ISD::FSQRT,      MVT::f32,    28 }, // Pentium III from http://www.agner.org/
     { ISD::FSQRT,      MVT::v4f32,  56 }, // Pentium III from http://www.agner.org/
   };
+  static const CostTblEntry POPCNT64CostTbl[] = { // 64-bit targets
+    { ISD::CTPOP,      MVT::i64,     1 },
+  };
+  static const CostTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
+    { ISD::CTPOP,      MVT::i32,     1 },
+    { ISD::CTPOP,      MVT::i16,     1 },
+    { ISD::CTPOP,      MVT::i8,      1 },
+  };
   static const CostTblEntry X64CostTbl[] = { // 64-bit targets
     { ISD::BITREVERSE, MVT::i64,    14 },
+    { ISD::CTPOP,      MVT::i64,    10 },
     { ISD::SADDO,      MVT::i64,     1 },
     { ISD::UADDO,      MVT::i64,     1 },
   };
@@ -2112,6 +2121,9 @@ int X86TTIImpl::getIntrinsicInstrCost(In
     { ISD::BITREVERSE, MVT::i32,    14 },
     { ISD::BITREVERSE, MVT::i16,    14 },
     { ISD::BITREVERSE, MVT::i8,     11 },
+    { ISD::CTPOP,      MVT::i32,     8 },
+    { ISD::CTPOP,      MVT::i16,     9 },
+    { ISD::CTPOP,      MVT::i8,      7 },
     { ISD::SADDO,      MVT::i32,     1 },
     { ISD::SADDO,      MVT::i16,     1 },
     { ISD::SADDO,      MVT::i8,      1 },
@@ -2223,6 +2235,17 @@ int X86TTIImpl::getIntrinsicInstrCost(In
       if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
         return LT.first * Entry->Cost;
 
+    if (ST->hasPOPCNT()) {
+      if (ST->is64Bit())
+        if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy))
+          return LT.first * Entry->Cost;
+
+      if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
+    }
+
+    // TODO - add LZCNT and BMI (TZCNT) scalar handling
+
     if (ST->is64Bit())
       if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
         return LT.first * Entry->Cost;

Modified: llvm/trunk/test/Analysis/CostModel/X86/ctpop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/ctpop.ll?rev=374775&r1=374774&r2=374775&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/ctpop.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/ctpop.ll Mon Oct 14 07:07:43 2019
@@ -16,7 +16,7 @@ declare  i8 @llvm.ctpop.i8(i8)
 
 define i64 @var_ctpop_i64(i64 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i64'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i64'
@@ -29,7 +29,7 @@ define i64 @var_ctpop_i64(i64 %a) {
 
 define i32 @var_ctpop_i32(i32 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i32'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i32'
@@ -42,7 +42,7 @@ define i32 @var_ctpop_i32(i32 %a) {
 
 define i16 @var_ctpop_i16(i16 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i16'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i16'
@@ -55,7 +55,7 @@ define i16 @var_ctpop_i16(i16 %a) {
 
 define i8 @var_ctpop_i8(i8 %a) {
 ; NOPOPCNT-LABEL: 'var_ctpop_i8'
-; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
 ; NOPOPCNT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctpop
 ;
 ; POPCNT-LABEL: 'var_ctpop_i8'

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/ctpop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/ctpop.ll?rev=374775&r1=374774&r2=374775&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/ctpop.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/ctpop.ll Mon Oct 14 07:07:43 2019
@@ -21,14 +21,29 @@ declare i16 @llvm.ctpop.i16(i16)
 declare  i8 @llvm.ctpop.i8(i8)
 
 define void @ctpop_2i64() #0 {
-; CHECK-LABEL: @ctpop_2i64(
-; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
-; CHECK-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
-; CHECK-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
-; CHECK-NEXT:    ret void
+; SSE2-LABEL: @ctpop_2i64(
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; SSE2-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP1]])
+; SSE2-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; SSE2-NEXT:    ret void
+;
+; SSE42-LABEL: @ctpop_2i64(
+; SSE42-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; SSE42-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE42-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
+; SSE42-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
+; SSE42-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; SSE42-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE42-NEXT:    ret void
+;
+; AVX-LABEL: @ctpop_2i64(
+; AVX-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
+; AVX-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
+; AVX-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
   %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
@@ -40,20 +55,29 @@ define void @ctpop_2i64() #0 {
 }
 
 define void @ctpop_4i64() #0 {
-; SSE-LABEL: @ctpop_4i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
-; SSE-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
-; SSE-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
-; SSE-NEXT:    [[CTPOP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD2]])
-; SSE-NEXT:    [[CTPOP3:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD3]])
-; SSE-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; SSE-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; SSE-NEXT:    store i64 [[CTPOP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; SSE-NEXT:    store i64 [[CTPOP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
-; SSE-NEXT:    ret void
+; SSE2-LABEL: @ctpop_4i64(
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE2-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP1]])
+; SSE2-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP2]])
+; SSE2-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 4
+; SSE2-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE2-NEXT:    ret void
+;
+; SSE42-LABEL: @ctpop_4i64(
+; SSE42-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; SSE42-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; SSE42-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; SSE42-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE42-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
+; SSE42-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
+; SSE42-NEXT:    [[CTPOP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD2]])
+; SSE42-NEXT:    [[CTPOP3:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD3]])
+; SSE42-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; SSE42-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; SSE42-NEXT:    store i64 [[CTPOP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; SSE42-NEXT:    store i64 [[CTPOP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE42-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctpop_4i64(
 ; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4




More information about the llvm-commits mailing list