[llvm] r233689 - [SystemZ] Use POPCNT instruction on z196
Ulrich Weigand
ulrich.weigand at de.ibm.com
Tue Mar 31 05:56:34 PDT 2015
Author: uweigand
Date: Tue Mar 31 07:56:33 2015
New Revision: 233689
URL: http://llvm.org/viewvc/llvm-project?rev=233689&view=rev
Log:
[SystemZ] Use POPCNT instruction on z196
We already exploit a number of instructions specific to z196,
but not yet POPCNT. Add support for the population-count
facility, MC support for the POPCNT instruction, CodeGen
support for using POPCNT, and implement the getPopcntSupport
TargetTransformInfo hook.
Added:
llvm/trunk/test/CodeGen/SystemZ/ctpop-01.ll
Modified:
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td
llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td
llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp
llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h
llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
llvm/trunk/test/MC/Disassembler/SystemZ/insns.txt
llvm/trunk/test/MC/SystemZ/insn-bad.s
llvm/trunk/test/MC/SystemZ/insn-good-z196.s
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Tue Mar 31 07:56:33 2015
@@ -163,8 +163,13 @@ SystemZTargetLowering::SystemZTargetLowe
// available, or if the operand is constant.
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ // Use POPCNT on z196 and above.
+ if (Subtarget.hasPopulationCount())
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ else
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
// No special instructions for these.
- setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -2304,6 +2309,45 @@ SDValue SystemZTargetLowering::lowerOR(S
MVT::i64, HighOp, Low32);
}
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int64_t OrigBitSize = VT.getSizeInBits();
+ SDLoc DL(Op);
+
+ // Get the known-zero mask for the operand.
+ Op = Op.getOperand(0);
+ APInt KnownZero, KnownOne;
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
+ uint64_t Mask = ~KnownZero.getZExtValue();
+
+ // Skip known-zero high parts of the operand.
+ int64_t BitSize = OrigBitSize;
+ while ((Mask & ((((uint64_t)1 << (BitSize / 2)) - 1) << (BitSize / 2))) == 0)
+ BitSize = BitSize / 2;
+
+ // The POPCNT instruction counts the number of bits in each byte.
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+ // Add up per-byte counts in a binary tree. All bits of Op at
+ // position larger than BitSize remain zero throughout.
+ for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT));
+ if (BitSize != OrigBitSize)
+ Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+ DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT));
+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+ }
+
+ // Extract overall result from high byte.
+ if (BitSize > 8)
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT));
+
+ return Op;
+}
+
// Op is an atomic load. Lower it into a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
@@ -2554,6 +2598,8 @@ SDValue SystemZTargetLowering::LowerOper
return lowerUDIVREM(Op, DAG);
case ISD::OR:
return lowerOR(Op, DAG);
+ case ISD::CTPOP:
+ return lowerCTPOP(Op, DAG);
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Tue Mar 31 07:56:33 2015
@@ -87,6 +87,9 @@ enum {
// the number of the register.
EXTRACT_ACCESS,
+ // Count number of bits set in operand 0 per byte.
+ POPCNT,
+
// Wrappers around the ISD opcodes of the same name. The output and
// first input operands are GR128s. The trailing numbers are the
// widths of the second operand in bits.
@@ -304,6 +307,7 @@ private:
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td Tue Mar 31 07:56:33 2015
@@ -1382,6 +1382,13 @@ let Defs = [CC] in {
def : Pat<(ctlz GR64:$src),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
+// Population count. Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
+ def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
+ "popcnt\t$R1, $R2",
+ [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
+}
+
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZOperators.td?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZOperators.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZOperators.td Tue Mar 31 07:56:33 2015
@@ -121,6 +121,7 @@ def z_select_ccmask : SDNode<"System
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
SDT_ZExtractAccess>;
+def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td Tue Mar 31 07:56:33 2015
@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
"Assume that the floating-point extension facility is installed"
>;
+def FeaturePopulationCount : SystemZFeature<
+ "population-count", "PopulationCount",
+ "Assume that the population-count facility is installed"
+>;
+
def FeatureFastSerialization : SystemZFeature<
"fast-serialization", "FastSerialization",
"Assume that the fast-serialization facility is installed"
@@ -54,9 +59,9 @@ def : Processor<"generic", NoItineraries
def : Processor<"z10", NoItineraries, []>;
def : Processor<"z196", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1]>;
def : Processor<"zEC12", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1]>;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp Tue Mar 31 07:56:33 2015
@@ -38,7 +38,8 @@ SystemZSubtarget::SystemZSubtarget(const
const TargetMachine &TM)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasFastSerialization(false), HasInterlockedAccess1(false),
+ HasPopulationCount(false), HasFastSerialization(false),
+ HasInterlockedAccess1(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
Modified: llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h Tue Mar 31 07:56:33 2015
@@ -38,6 +38,7 @@ protected:
bool HasLoadStoreOnCond;
bool HasHighWord;
bool HasFPExtension;
+ bool HasPopulationCount;
bool HasFastSerialization;
bool HasInterlockedAccess1;
@@ -86,6 +87,9 @@ public:
// Return true if the target has the floating-point extension facility.
bool hasFPExtension() const { return HasFPExtension; }
+ // Return true if the target has the population-count facility.
+ bool hasPopulationCount() const { return HasPopulationCount; }
+
// Return true if the target has the fast-serialization facility.
bool hasFastSerialization() const { return HasFastSerialization; }
Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp Tue Mar 31 07:56:33 2015
@@ -229,3 +229,12 @@ unsigned SystemZTTIImpl::getIntImmCost(I
}
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+ if (ST->hasPopulationCount() && TyWidth <= 64)
+ return TTI::PSK_FastHardware;
+ return TTI::PSK_Software;
+}
+
Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h Tue Mar 31 07:56:33 2015
@@ -60,6 +60,8 @@ public:
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
/// @}
};
Added: llvm/trunk/test/CodeGen/SystemZ/ctpop-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/ctpop-01.ll?rev=233689&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/ctpop-01.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/ctpop-01.ll Tue Mar 31 07:56:33 2015
@@ -0,0 +1,96 @@
+; Test population-count instruction
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32 %a)
+declare i64 @llvm.ctpop.i64(i64 %a)
+
+define i32 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: popcnt %r0, %r2
+; CHECK: sllk %r1, %r0, 16
+; CHECK: ar %r1, %r0
+; CHECK: sllk %r2, %r1, 8
+; CHECK: ar %r2, %r1
+; CHECK: srl %r2, 24
+; CHECK: br %r14
+
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %popcnt
+}
+
+define i32 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: llhr %r0, %r2
+; CHECK: popcnt %r0, %r0
+; CHECK: risblg %r2, %r0, 16, 151, 8
+; CHECK: ar %r2, %r0
+; CHECK: srl %r2, 8
+; CHECK: br %r14
+ %and = and i32 %a, 65535
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %popcnt
+}
+
+define i32 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: llcr %r0, %r2
+; CHECK: popcnt %r2, %r0
+; CHECK: br %r14
+ %and = and i32 %a, 255
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %popcnt
+}
+
+define i64 @f4(i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK: popcnt %r0, %r2
+; CHECK: sllg %r1, %r0, 32
+; CHECK: agr %r1, %r0
+; CHECK: sllg %r0, %r1, 16
+; CHECK: agr %r0, %r1
+; CHECK: sllg %r1, %r0, 8
+; CHECK: agr %r1, %r0
+; CHECK: srlg %r2, %r1, 56
+; CHECK: br %r14
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %popcnt
+}
+
+define i64 @f5(i64 %a) {
+; CHECK-LABEL: f5:
+; CHECK: llgfr %r0, %r2
+; CHECK: popcnt %r0, %r0
+; CHECK: sllg %r1, %r0, 16
+; CHECK: algfr %r0, %r1
+; CHECK: sllg %r1, %r0, 8
+; CHECK: algfr %r0, %r1
+; CHECK: srlg %r2, %r0, 24
+ %and = and i64 %a, 4294967295
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
+define i64 @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: llghr %r0, %r2
+; CHECK: popcnt %r0, %r0
+; CHECK: risbg %r1, %r0, 48, 183, 8
+; CHECK: agr %r1, %r0
+; CHECK: srlg %r2, %r1, 8
+; CHECK: br %r14
+ %and = and i64 %a, 65535
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
+define i64 @f7(i64 %a) {
+; CHECK-LABEL: f7:
+; CHECK: llgcr %r0, %r2
+; CHECK: popcnt %r2, %r0
+; CHECK: br %r14
+ %and = and i64 %a, 255
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
Modified: llvm/trunk/test/MC/Disassembler/SystemZ/insns.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/SystemZ/insns.txt?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/SystemZ/insns.txt (original)
+++ llvm/trunk/test/MC/Disassembler/SystemZ/insns.txt Tue Mar 31 07:56:33 2015
@@ -6334,6 +6334,18 @@
# CHECK: pfd 15, 0
0xe3 0xf0 0x00 0x00 0x00 0x36
+# CHECK: popcnt %r0, %r0
+0xb9 0xe1 0x00 0x00
+
+# CHECK: popcnt %r0, %r15
+0xb9 0xe1 0x00 0x0f
+
+# CHECK: popcnt %r15, %r0
+0xb9 0xe1 0x00 0xf0
+
+# CHECK: popcnt %r7, %r8
+0xb9 0xe1 0x00 0x78
+
# CHECK: risbg %r0, %r0, 0, 0, 0
0xec 0x00 0x00 0x00 0x00 0x55
Modified: llvm/trunk/test/MC/SystemZ/insn-bad.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/SystemZ/insn-bad.s?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/test/MC/SystemZ/insn-bad.s (original)
+++ llvm/trunk/test/MC/SystemZ/insn-bad.s Tue Mar 31 07:56:33 2015
@@ -2666,6 +2666,11 @@
pfdrl 1, 1
pfdrl 1, 0x100000000
+#CHECK: error: {{(instruction requires: population-count)?}}
+#CHECK: popcnt %r0, %r0
+
+ popcnt %r0, %r0
+
#CHECK: error: invalid operand
#CHECK: risbg %r0,%r0,0,0,-1
#CHECK: error: invalid operand
Modified: llvm/trunk/test/MC/SystemZ/insn-good-z196.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/SystemZ/insn-good-z196.s?rev=233689&r1=233688&r2=233689&view=diff
==============================================================================
--- llvm/trunk/test/MC/SystemZ/insn-good-z196.s (original)
+++ llvm/trunk/test/MC/SystemZ/insn-good-z196.s Tue Mar 31 07:56:33 2015
@@ -1021,6 +1021,16 @@
ork %r15,%r0,%r0
ork %r7,%r8,%r9
+#CHECK: popcnt %r0, %r0 # encoding: [0xb9,0xe1,0x00,0x00]
+#CHECK: popcnt %r0, %r15 # encoding: [0xb9,0xe1,0x00,0x0f]
+#CHECK: popcnt %r15, %r0 # encoding: [0xb9,0xe1,0x00,0xf0]
+#CHECK: popcnt %r7, %r8 # encoding: [0xb9,0xe1,0x00,0x78]
+
+ popcnt %r0,%r0
+ popcnt %r0,%r15
+ popcnt %r15,%r0
+ popcnt %r7,%r8
+
#CHECK: risbhg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x5d]
#CHECK: risbhg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x5d]
#CHECK: risbhg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x5d]
More information about the llvm-commits
mailing list