[llvm] [VP][RISCV] Introduce experimental.vp.popcount and RISC-V support. (PR #74294)
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 00:39:22 PST 2023
https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/74294
>From b7873531ddd1505051cab50650873beee4225881 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <46629943+yetingk at users.noreply.github.com>
Date: Thu, 30 Nov 2023 13:29:44 +0800
Subject: [PATCH 1/5] [VP][RISCV] Introduce experimental.vp.popcount and RISC-V
support.
This is similar to vp.ctpop. But this is counts the whole source mask and
vp.ctpop counts each lane of its source.
---
llvm/docs/LangRef.rst | 34 +++
llvm/include/llvm/IR/Intrinsics.td | 4 +
llvm/include/llvm/IR/VPIntrinsics.def | 6 +
.../SelectionDAG/LegalizeIntegerTypes.cpp | 10 +
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 3 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 38 +++
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 32 ++
llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 +
.../RISCV/rvv/fixed-vectors-vp-popcount.ll | 282 ++++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vp-popcount.ll | 260 ++++++++++++++++
llvm/unittests/IR/VPIntrinsicTest.cpp | 2 +
11 files changed, 672 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-popcount.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index af064d7ac2195..e346f19c37010 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -21761,6 +21761,40 @@ This intrinsic reverses the order of the first ``evl`` elements in a vector.
The lanes in the result vector disabled by ``mask`` are ``poison``. The
elements past ``evl`` are poison.
+
+.. _int_experimental_vp_popcount:
+
+
+'``llvm.experimental.vp.popcount``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1> %vec, <2 x i1> %mask, i32 %evl)
+ declare i32 @llvm.experimental.vp.popcount.nxv2i1(<vscale x 4 x i1> %vec, <vscale x 4 x i1> %mask, i32 %evl)
+
+Overview:
+"""""""""
+
+Predicated population count of a vector mask.
+
+Arguments:
+""""""""""
+
+The first and second argument are vector masks and have same number of elements.
+The third argument is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+This intrinsic population counts first ``evl`` elements in a vector.
+The lanes in the result vector disabled by ``mask`` are ``poison``. The
+elements past ``evl`` are poison.
+
.. _int_vp_load:
'``llvm.vp.load``' Intrinsic
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 060e964f77bf7..c8e83ede1f53b 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2162,6 +2162,10 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
+ def int_experimental_vp_popcount: DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [ llvm_anyvector_ty,
+ LLVMMatchType<0>,
+ llvm_i32_ty]>;
}
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<1>>] in {
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 671dc39db26df..cb776419c9adf 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -710,6 +710,12 @@ END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE)
///// } Shuffles
+// llvm.experimental.vp.popcount(x,mask,vlen)
+BEGIN_REGISTER_VP(experimental_vp_popcount, 1, 2,
+ EXPERIMENTAL_VP_POPCOUNT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(experimental_vp_popcount)
+END_REGISTER_VP(experimental_vp_popcount, EXPERIMENTAL_VP_POPCOUNT)
+
#undef BEGIN_REGISTER_VP
#undef BEGIN_REGISTER_VP_INTRINSIC
#undef BEGIN_REGISTER_VP_SDNODE
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 54698edce7d6f..8c478b05eef79 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -305,6 +305,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::LLRINT:
Res = PromoteIntRes_XRINT(N);
break;
+
+ case ISD::EXPERIMENTAL_VP_POPCOUNT:
+ Res = PromoteIntRes_VP_POPCOUNT(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -5882,6 +5886,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_POPCOUNT(SDNode *N) {
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
// The result type is equal to the first input operand's type, so the
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e9bd54089d062..74a44e6177383 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -365,6 +365,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntRes_FunnelShift(SDNode *N);
SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
+ SDValue PromoteIntRes_VP_POPCOUNT(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -916,6 +917,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SplitVecOp_FP_ROUND(SDNode *N);
SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N);
SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
+ SDValue SplitVecOp_VP_POPCOUNT(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -1023,6 +1025,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue WidenVecOp_VP_REDUCE(SDNode *N);
SDValue WidenVecOp_ExpOp(SDNode *N);
+ SDValue WidenVecOp_VP_POPCOUNT(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 66461b26468f7..840338e16f683 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3085,6 +3085,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_REDUCE_FMIN:
Res = SplitVecOp_VP_REDUCE(N, OpNo);
break;
+ case ISD::EXPERIMENTAL_VP_POPCOUNT:
+ Res = SplitVecOp_VP_POPCOUNT(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -4031,6 +4034,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_VP_POPCOUNT(SDNode *N, unsigned OpNo) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ assert(OpNo == 0 && "Can only split first operand");
+
+ unsigned Opc = N->getOpcode();
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue VecOp = N->getOperand(OpNo);
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(2), VecVT, dl);
+
+ SDValue ResLo = DAG.getNode(Opc, dl, ResVT, {Lo, MaskLo, EVLLo});
+ SDValue ResHi = DAG.getNode(Opc, dl, ResVT, {Hi, MaskHi, EVLHi});
+ return DAG.getNode(ISD::ADD, dl, ResVT, ResLo, ResHi);
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Widening
//===----------------------------------------------------------------------===//
@@ -6120,6 +6148,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_REDUCE_FMIN:
Res = WidenVecOp_VP_REDUCE(N);
break;
+ case ISD::EXPERIMENTAL_VP_POPCOUNT:
+ Res = WidenVecOp_VP_POPCOUNT(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -6883,6 +6914,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
DAG.getVectorIdxConstant(0, DL));
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_POPCOUNT(SDNode *N) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Op = GetWidenedVector(N->getOperand(0));
+ SDValue Mask = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Op, Mask,
+ N->getOperand(2));
+}
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cf1b11c14b6d0..88d92b3988dd8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -766,6 +766,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_POPCOUNT, VT, Custom);
setOperationPromotedToType(
ISD::VECTOR_SPLICE, VT,
@@ -1140,6 +1141,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_POPCOUNT, VT, Custom);
continue;
}
@@ -6607,6 +6609,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
case ISD::EXPERIMENTAL_VP_REVERSE:
return lowerVPReverseExperimental(Op, DAG);
+ case ISD::EXPERIMENTAL_VP_POPCOUNT:
+ return lowerVPPopcountExperimental(Op, DAG);
}
}
@@ -19744,6 +19748,34 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
return true;
}
+SDValue
+RISCVTargetLowering::lowerVPPopcountExperimental(SDValue N,
+ SelectionDAG &DAG) const {
+ SDValue Op = N.getOperand(0);
+ SDValue Mask = N.getOperand(1);
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(N);
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ bool IsUnMasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op = convertToScalableVector(ContainerVT, Op, DAG, Subtarget);
+ Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
+ }
+
+ if (IsUnMasked)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Subtarget.getXLenVT(),
+ DAG.getConstant(Intrinsic::riscv_vcpop, DL, XLenVT), Op,
+ N->getOperand(2));
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Subtarget.getXLenVT(),
+ DAG.getConstant(Intrinsic::riscv_vcpop_mask, DL, XLenVT),
+ Op, Mask, N->getOperand(2));
+}
+
MachineInstr *
RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator &MBBI,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 486efeb8339ab..b5074d6a06372 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -905,6 +905,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPPopcountExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll
new file mode 100644
index 0000000000000..6961f3996b257
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.experimental.vp.popcount.v1i1(<1 x i1>, <1 x i1>, i32)
+define i32 @vp_popcount_v1i1(<1 x i1> %m, <1 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v1i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v1i1(<1 x i1> %op, <1 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v1i1_unmasked(<1 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v1i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <1 x i1> %head, <1 x i1> poison, <1 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v1i1(<1 x i1> %op, <1 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1>, <2 x i1>, i32)
+define i32 @vp_popcount_v2i1(<2 x i1> %m, <2 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v2i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1> %op, <2 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v2i1_unmasked(<2 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v2i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1> %op, <2 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v4i1(<4 x i1>, <4 x i1>, i32)
+define i32 @vp_popcount_v4i1(<4 x i1> %m, <4 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v4i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v4i1(<4 x i1> %op, <4 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v4i1_unmasked(<4 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v4i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v4i1(<4 x i1> %op, <4 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v8i1(<8 x i1>, <8 x i1>, i32)
+define i32 @vp_popcount_v8i1(<8 x i1> %m, <8 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v8i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v8i1(<8 x i1> %op, <8 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v8i1_unmasked(<8 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v8i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v8i1(<8 x i1> %op, <8 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v16i1(<16 x i1>, <16 x i1>, i32)
+define i32 @vp_popcount_v16i1(<16 x i1> %m, <16 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v16i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v16i1(<16 x i1> %op, <16 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v16i1_unmasked(<16 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v16i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v16i1(<16 x i1> %op, <16 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v32i1(<32 x i1>, <32 x i1>, i32)
+define i32 @vp_popcount_v32i1(<32 x i1> %m, <32 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v32i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v32i1(<32 x i1> %op, <32 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v32i1_unmasked(<32 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v32i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v32i1(<32 x i1> %op, <32 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v63i1(<63 x i1>, <63 x i1>, i32)
+define i32 @vp_popcount_v63i1(<63 x i1> %m, <63 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v63i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v63i1(<63 x i1> %op, <63 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v63i1_unmasked(<63 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v63i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <63 x i1> poison, i1 true, i32 0
+ %m = shufflevector <63 x i1> %head, <63 x i1> poison, <63 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v63i1(<63 x i1> %op, <63 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v64i1(<64 x i1>, <64 x i1>, i32)
+define i32 @vp_popcount_v64i1(<64 x i1> %m, <64 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v64i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v64i1(<64 x i1> %op, <64 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v64i1_unmasked(<64 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v64i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <64 x i1> %head, <64 x i1> poison, <64 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v64i1(<64 x i1> %op, <64 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v128i1(<128 x i1>, <128 x i1>, i32)
+define i32 @vp_popcount_v128i1(<128 x i1> %m, <128 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v128i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v128i1(<128 x i1> %op, <128 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v128i1_unmasked(<128 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v128i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <128 x i1> poison, i1 true, i32 0
+ %m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v128i1(<128 x i1> %op, <128 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v256i1(<256 x i1>, <256 x i1>, i32)
+define i32 @vp_popcount_v256i1(<256 x i1> %m, <256 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v256i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB18_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB18_2: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a1, v9, v0.t
+; CHECK-NEXT: addi a2, a0, -128
+; CHECK-NEXT: sltu a0, a0, a2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vcpop.m a0, v10, v0.t
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.v256i1(<256 x i1> %op, <256 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_v256i1_unmasked(<256 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v256i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB19_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB19_2: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a1, v0
+; CHECK-NEXT: addi a2, a0, -128
+; CHECK-NEXT: sltu a0, a0, a2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <256 x i1> poison, i1 true, i32 0
+ %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.v256i1(<256 x i1> %op, <256 x i1> %m, i32 %evl)
+ ret i32 %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-popcount.ll b/llvm/test/CodeGen/RISCV/rvv/vp-popcount.ll
new file mode 100644
index 0000000000000..9b53e7e5a8610
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-popcount.ll
@@ -0,0 +1,260 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.experimental.vp.popcount.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, i32)
+define i32 @vp_popcount_nxv1i1(<vscale x 1 x i1> %m, <vscale x 1 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv1i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv1i1(<vscale x 1 x i1> %op, <vscale x 1 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv1i1_unmasked(<vscale x 1 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv1i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv1i1(<vscale x 1 x i1> %op, <vscale x 1 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
+define i32 @vp_popcount_nxv2i1(<vscale x 2 x i1> %m, <vscale x 2 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv2i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv2i1(<vscale x 2 x i1> %op, <vscale x 2 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv2i1_unmasked(<vscale x 2 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv2i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv2i1(<vscale x 2 x i1> %op, <vscale x 2 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
+define i32 @vp_popcount_nxv4i1(<vscale x 4 x i1> %m, <vscale x 4 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv4i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv4i1(<vscale x 4 x i1> %op, <vscale x 4 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv4i1_unmasked(<vscale x 4 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv4i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv4i1(<vscale x 4 x i1> %op, <vscale x 4 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
+define i32 @vp_popcount_nxv8i1(<vscale x 8 x i1> %m, <vscale x 8 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv8i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv8i1(<vscale x 8 x i1> %op, <vscale x 8 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv8i1_unmasked(<vscale x 8 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv8i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv8i1(<vscale x 8 x i1> %op, <vscale x 8 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
+define i32 @vp_popcount_nxv16i1(<vscale x 16 x i1> %m, <vscale x 16 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv16i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv16i1(<vscale x 16 x i1> %op, <vscale x 16 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv16i1_unmasked(<vscale x 16 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv16i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv16i1(<vscale x 16 x i1> %op, <vscale x 16 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>, i32)
+define i32 @vp_popcount_nxv32i1(<vscale x 32 x i1> %m, <vscale x 32 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv32i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv32i1(<vscale x 32 x i1> %op, <vscale x 32 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv32i1_unmasked(<vscale x 32 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv32i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv32i1(<vscale x 32 x i1> %op, <vscale x 32 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv63i1(<vscale x 63 x i1>, <vscale x 63 x i1>, i32)
+define i32 @vp_popcount_nxv63i1(<vscale x 63 x i1> %m, <vscale x 63 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv63i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv63i1(<vscale x 63 x i1> %op, <vscale x 63 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv63i1_unmasked(<vscale x 63 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv63i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 63 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 63 x i1> %head, <vscale x 63 x i1> poison, <vscale x 63 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv63i1(<vscale x 63 x i1> %op, <vscale x 63 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>, i32)
+define i32 @vp_popcount_nxv64i1(<vscale x 64 x i1> %m, <vscale x 64 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv64i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv64i1(<vscale x 64 x i1> %op, <vscale x 64 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv64i1_unmasked(<vscale x 64 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv64i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv64i1(<vscale x 64 x i1> %op, <vscale x 64 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.nxv128i1(<vscale x 128 x i1>, <vscale x 128 x i1>, i32)
+define i32 @vp_popcount_nxv128i1(<vscale x 128 x i1> %m, <vscale x 128 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv128i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: vcpop.m a2, v10, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB16_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB16_2: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vcpop.m a0, v9, v0.t
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: ret
+entry:
+ %a = call i32 @llvm.experimental.vp.popcount.nxv128i1(<vscale x 128 x i1> %op, <vscale x 128 x i1> %m, i32 %evl)
+ ret i32 %a
+}
+
+define i32 @vp_popcount_nxv128i1_unmasked(<vscale x 128 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_nxv128i1_unmasked:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a2, v8
+; CHECK-NEXT: bltu a0, a1, .LBB17_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB17_2: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: ret
+entry:
+ %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+ %a = call i32 @llvm.experimental.vp.popcount.nxv128i1(<vscale x 128 x i1> %op, <vscale x 128 x i1> %m, i32 %evl)
+ ret i32 %a
+}
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index a3bef3d42adb0..e4545ac0e3389 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -150,6 +150,8 @@ class VPIntrinsicTest : public testing::Test {
Str << " declare <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32>, "
"<8 x i1>, i32) ";
+ Str << " declare i32 @llvm.experimental.vp.popcount.v8i1(<8 x i1>, "
+ "<8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.abs.v8i16"
<< "(<8 x i16>, i1 immarg, <8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.bitreverse.v8i16"
>From b62400852e18bd88e46ccc6b82161c309da518c0 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Dec 2023 19:22:03 -0800
Subject: [PATCH 2/5] Use VCPOP_VL to avoid checking use of mask.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 88d92b3988dd8..e155bfa964973 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19766,14 +19766,8 @@ RISCVTargetLowering::lowerVPPopcountExperimental(SDValue N,
Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
}
- if (IsUnMasked)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Subtarget.getXLenVT(),
- DAG.getConstant(Intrinsic::riscv_vcpop, DL, XLenVT), Op,
- N->getOperand(2));
-
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Subtarget.getXLenVT(),
- DAG.getConstant(Intrinsic::riscv_vcpop_mask, DL, XLenVT),
- Op, Mask, N->getOperand(2));
+ return DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Op, Mask,
+ N->getOperand(2));
}
MachineInstr *
>From 653775119e07297a8a92a110d42376c06ea76f73 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Dec 2023 19:29:19 -0800
Subject: [PATCH 3/5] Refine llvm/docs/LangRef.rst
---
llvm/docs/LangRef.rst | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index e346f19c37010..76de22e9174c7 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -21791,9 +21791,8 @@ The third argument is the explicit vector length of the operation.
Semantics:
""""""""""
-This intrinsic population counts first ``evl`` elements in a vector.
-The lanes in the result vector disabled by ``mask`` are ``poison``. The
-elements past ``evl`` are poison.
+This intrinsic population counts first ``evl`` elements actived by ``mask`` in a
+vector.
.. _int_vp_load:
>From d9c48e2f2bcb0bd81b27754d54d28a48c43445a3 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Wed, 6 Dec 2023 16:37:22 +0800
Subject: [PATCH 4/5] Remove unused variable
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e155bfa964973..694b7fa55c916 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19757,8 +19757,6 @@ RISCVTargetLowering::lowerVPPopcountExperimental(SDValue N,
SDLoc DL(N);
MVT XLenVT = Subtarget.getXLenVT();
- bool IsUnMasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
-
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
>From 1bb845a2649269749e25f6787ad3840e963b873f Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Wed, 6 Dec 2023 16:39:08 +0800
Subject: [PATCH 5/5] Address comment.
---
llvm/docs/LangRef.rst | 2 +-
llvm/include/llvm/IR/VPIntrinsics.def | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 76de22e9174c7..3a9866492565c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -21791,7 +21791,7 @@ The third argument is the explicit vector length of the operation.
Semantics:
""""""""""
-This intrinsic population counts first ``evl`` elements actived by ``mask`` in a
+This intrinsic population counts first ``evl`` elements activated by ``mask`` in a
vector.
.. _int_vp_load:
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index cb776419c9adf..08e8d8cc83a09 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -713,7 +713,7 @@ END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE)
// llvm.experimental.vp.popcount(x,mask,vlen)
BEGIN_REGISTER_VP(experimental_vp_popcount, 1, 2,
EXPERIMENTAL_VP_POPCOUNT, 0)
-VP_PROPERTY_FUNCTIONAL_INTRINSIC(experimental_vp_popcount)
+VP_PROPERTY_NO_FUNCTIONAL
END_REGISTER_VP(experimental_vp_popcount, EXPERIMENTAL_VP_POPCOUNT)
#undef BEGIN_REGISTER_VP
More information about the llvm-commits
mailing list