[llvm] r264600 - [PowerPC] On the A2, popcnt[dw] are very slow

Hal Finkel via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 28 10:52:08 PDT 2016


Author: hfinkel
Date: Mon Mar 28 12:52:08 2016
New Revision: 264600

URL: http://llvm.org/viewvc/llvm-project?rev=264600&view=rev
Log:
[PowerPC] On the A2, popcnt[dw] are very slow

The A2 cores support the popcntw/popcntd instructions, but they're microcoded,
and slower than our default software emulation. Specifically, popcnt[dw] take
approximately 74 cycles, whereas our software emulation takes only 24-28
cycles.

I've added a new target feature to indicate a slow popcnt[dw], instead of just
removing the existing target feature from the a2/a2q processor models, because:
  1. This allows us to return more accurate information via the TTI interface
     (I recognize that this currently makes no practical difference)
  2. Is hopefully easier to understand (it allows the core's features to match
     its manual while still having the desired effect).

Modified:
    llvm/trunk/lib/Target/PowerPC/PPC.td
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp
    llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h
    llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
    llvm/trunk/test/CodeGen/PowerPC/popcnt.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPC.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPC.td?rev=264600&r1=264599&r2=264600&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPC.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPC.td Mon Mar 28 12:52:08 2016
@@ -155,6 +155,12 @@ def FeatureFloat128 :
 def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
   "Treat vector data stream cache control instructions as deprecated">;
 
+// Note that for the a2/a2q processor models we should not use popcnt[dw] by
+// default. These processors do support the instructions, but they're
+// microcoded, and the software emulation is about twice as fast.
+def SlowPOPCNTD      : SubtargetFeature<"slow-popcntd","SlowPOPCNTD", "true",
+                                        "The popcnt[dw] instructions are slow">;
+
 /*  Since new processors generally contain a superset of features of those that
     came before them, the idea is to make implementations of new processors
     less error prone and easier to read.
@@ -337,16 +343,17 @@ def : ProcessorModel<"a2", PPCA2Model,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX,
                    FeatureFPRND, FeatureFPCVT, FeatureISEL,
-                   FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
-               /*, Feature64BitRegs */, FeatureMFTB]>;
+                   FeaturePOPCNTD, SlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
 def : ProcessorModel<"a2q", PPCA2Model,
                   [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX,
                    FeatureFPRND, FeatureFPCVT, FeatureISEL,
-                   FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
-               /*, Feature64BitRegs */, FeatureQPX, FeatureMFTB]>;
+                   FeaturePOPCNTD, SlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
+                   FeatureMFTB]>;
 def : ProcessorModel<"pwr3", G5Model,
                   [DirectivePwr3, FeatureAltivec,
                    FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=264600&r1=264599&r2=264600&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Mar 28 12:52:08 2016
@@ -214,7 +214,7 @@ PPCTargetLowering::PPCTargetLowering(con
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 
-  if (Subtarget.hasPOPCNTD()) {
+  if (Subtarget.hasPOPCNTD() && !Subtarget.isPOPCNTDSlow()) {
     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
   } else {

Modified: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp?rev=264600&r1=264599&r2=264600&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp Mon Mar 28 12:52:08 2016
@@ -105,6 +105,7 @@ void PPCSubtarget::initializeEnvironment
   HasHTM = false;
   HasFusion = false;
   HasFloat128 = false;
+  SlowPOPCNTD = false;
 }
 
 void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {

Modified: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h?rev=264600&r1=264599&r2=264600&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h Mon Mar 28 12:52:08 2016
@@ -124,6 +124,7 @@ protected:
   bool HasHTM;
   bool HasFusion;
   bool HasFloat128;
+  bool SlowPOPCNTD;
 
   /// When targeting QPX running a stock PPC64 Linux kernel where the stack
   /// alignment has not been changed, we need to keep the 16-byte alignment
@@ -248,6 +249,7 @@ public:
   bool isE500() const { return IsE500; }
   bool isFeatureMFTB() const { return FeatureMFTB; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
+  bool isPOPCNTDSlow() const { return SlowPOPCNTD; }
   bool hasICBT() const { return HasICBT; }
   bool hasInvariantFunctionDescriptors() const {
     return HasInvariantFunctionDescriptors;

Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp?rev=264600&r1=264599&r2=264600&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Mon Mar 28 12:52:08 2016
@@ -43,7 +43,7 @@ TargetTransformInfo::PopcntSupportKind
 PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
   if (ST->hasPOPCNTD() && TyWidth <= 64)
-    return TTI::PSK_FastHardware;
+    return ST->isPOPCNTDSlow() ? TTI::PSK_SlowHardware : TTI::PSK_FastHardware;
   return TTI::PSK_Software;
 }
 

Modified: llvm/trunk/test/CodeGen/PowerPC/popcnt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/popcnt.ll?rev=264600&r1=264599&r2=264600&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/popcnt.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/popcnt.ll Mon Mar 28 12:52:08 2016
@@ -1,37 +1,51 @@
 ; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s
+; RUN: llc -march=ppc64 -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -march=ppc64 -mcpu=a2q < %s | FileCheck %s --check-prefix=SLOWPC
 
 define i8 @cnt8(i8 %x) nounwind readnone {
   %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
   ret i8 %cnt
-; CHECK: @cnt8
+; CHECK-LABEL: @cnt8
 ; CHECK: rlwinm
 ; CHECK: popcntw
 ; CHECK: blr
+
+; SLOWPC-LABEL: @cnt8
+; SLOWPC-NOT: popcnt
 }
 
 define i16 @cnt16(i16 %x) nounwind readnone {
   %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
   ret i16 %cnt
-; CHECK: @cnt16
+; CHECK-LABEL: @cnt16
 ; CHECK: rlwinm
 ; CHECK: popcntw
 ; CHECK: blr
+
+; SLOWPC-LABEL: @cnt16
+; SLOWPC-NOT: popcnt
 }
 
 define i32 @cnt32(i32 %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
-; CHECK: @cnt32
+; CHECK-LABEL: @cnt32
 ; CHECK: popcntw
 ; CHECK: blr
+
+; SLOWPC-LABEL: @cnt32
+; SLOWPC-NOT: popcnt
 }
 
 define i64 @cnt64(i64 %x) nounwind readnone {
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
-; CHECK: @cnt64
+; CHECK-LABEL: @cnt64
 ; CHECK: popcntd
 ; CHECK: blr
+
+; SLOWPC-LABEL: @cnt64
+; SLOWPC-NOT: popcnt
 }
 
 declare i8 @llvm.ctpop.i8(i8) nounwind readnone




More information about the llvm-commits mailing list