[llvm] [DAG] Add generic i8 CTPOP lowering using i32 MUL (PR #79989)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 31 22:10:42 PST 2024
================
@@ -8639,6 +8639,24 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
return SDValue();
+ // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
+ if (VT == MVT::i8 && shouldAllowMultiplyInBitCounts(MVT::i8, MVT::i32) &&
+ isOperationLegal(ISD::AND, MVT::i32) &&
+ isOperationLegal(ISD::SRL, MVT::i32) &&
+ isOperationLegal(ISD::MUL, MVT::i32)) {
+ SDValue Mask11 = DAG.getConstant(0x11111111U, dl, MVT::i32);
+ Op = DAG.getZExtOrTrunc(Op, dl, MVT::i32);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op,
+ DAG.getConstant(0x08040201U, dl, MVT::i32));
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(3, MVT::i32, dl));
+ Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(28, MVT::i32, dl));
+ return DAG.getZExtOrTrunc(Op, dl, MVT::i8);
+ }
+
// This is the "best" algorithm from
----------------
phoebewang wrote:
So it is not the "best" algorithm now :)
https://github.com/llvm/llvm-project/pull/79989
More information about the llvm-commits
mailing list