[llvm] [VP][RISCV] Introduce experimental.vp.popcount and RISC-V support. (PR #74294)

Mon Dec 4 01:32:54 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-llvm-selectiondag

Author: Yeting Kuo (yetingk)

<details>
<summary>Changes</summary>

This is similar to vp.ctpop. But this is counts the whole source mask and vp.ctpop counts each lane of its source.

---

Patch is 34.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74294.diff


11 Files Affected:

- (modified) llvm/docs/LangRef.rst (+34) 
- (modified) llvm/include/llvm/IR/Intrinsics.td (+4) 
- (modified) llvm/include/llvm/IR/VPIntrinsics.def (+6) 
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+10) 
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+3) 
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+38) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+32) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+1) 
- (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll (+282) 
- (added) llvm/test/CodeGen/RISCV/rvv/vp-popcount.ll (+260) 
- (modified) llvm/unittests/IR/VPIntrinsicTest.cpp (+2) 


``````````diff

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index af064d7ac2195..e346f19c37010 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -21761,6 +21761,40 @@ This intrinsic reverses the order of the first ``evl`` elements in a vector.
 The lanes in the result vector disabled by ``mask`` are ``poison``. The
 elements past ``evl`` are poison.
 
+
+.. _int_experimental_vp_popcount:
+
+
+'``llvm.experimental.vp.popcount``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1> %vec, <2 x i1> %mask, i32 %evl)
+      declare i32 @llvm.experimental.vp.popcount.nxv2i1(<vscale x 4 x i1> %vec, <vscale x 4 x i1> %mask, i32 %evl)
+
+Overview:
+"""""""""
+
+Predicated population count of a vector mask.
+
+Arguments:
+""""""""""
+
+The first and second argument are vector masks and have same number of elements.
+The third argument is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+This intrinsic population counts first ``evl`` elements in a vector.
+The lanes in the result vector disabled by ``mask`` are ``poison``. The
+elements past ``evl`` are poison.
+
 .. _int_vp_load:
 
 '``llvm.vp.load``' Intrinsic
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 060e964f77bf7..c8e83ede1f53b 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2162,6 +2162,10 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
                                llvm_anyvector_ty,
                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                llvm_i32_ty]>;
+  def int_experimental_vp_popcount: DefaultAttrsIntrinsic<[llvm_i32_ty],
+                             [ llvm_anyvector_ty,
+                               LLVMMatchType<0>,
+                               llvm_i32_ty]>;
 }
 
 let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<1>>] in {
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 671dc39db26df..cb776419c9adf 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -710,6 +710,12 @@ END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE)
 
 ///// } Shuffles
 
+// llvm.experimental.vp.popcount(x,mask,vlen)
+BEGIN_REGISTER_VP(experimental_vp_popcount, 1, 2,
+                  EXPERIMENTAL_VP_POPCOUNT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(experimental_vp_popcount)
+END_REGISTER_VP(experimental_vp_popcount, EXPERIMENTAL_VP_POPCOUNT)
+
 #undef BEGIN_REGISTER_VP
 #undef BEGIN_REGISTER_VP_INTRINSIC
 #undef BEGIN_REGISTER_VP_SDNODE
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 54698edce7d6f..8c478b05eef79 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -305,6 +305,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::LLRINT:
     Res = PromoteIntRes_XRINT(N);
     break;
+
+  case ISD::EXPERIMENTAL_VP_POPCOUNT:
+    Res = PromoteIntRes_VP_POPCOUNT(N);
+    break;
   }
 
   // If the result is null then the sub-method took care of registering it.
@@ -5882,6 +5886,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_POPCOUNT(SDNode *N) {
+  SDLoc dl(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
   SDLoc dl(N);
   // The result type is equal to the first input operand's type, so the
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e9bd54089d062..74a44e6177383 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -365,6 +365,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntRes_FunnelShift(SDNode *N);
   SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
   SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
+  SDValue PromoteIntRes_VP_POPCOUNT(SDNode *N);
 
   // Integer Operand Promotion.
   bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -916,6 +917,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
   SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N);
   SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
+  SDValue SplitVecOp_VP_POPCOUNT(SDNode *N, unsigned OpNo);
 
   //===--------------------------------------------------------------------===//
   // Vector Widening Support: LegalizeVectorTypes.cpp
@@ -1023,6 +1025,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
   SDValue WidenVecOp_VP_REDUCE(SDNode *N);
   SDValue WidenVecOp_ExpOp(SDNode *N);
+  SDValue WidenVecOp_VP_POPCOUNT(SDNode *N);
 
   /// Helper function to generate a set of operations to perform
   /// a vector operation for a wider type.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 66461b26468f7..840338e16f683 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3085,6 +3085,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::VP_REDUCE_FMIN:
     Res = SplitVecOp_VP_REDUCE(N, OpNo);
     break;
+  case ISD::EXPERIMENTAL_VP_POPCOUNT:
+    Res = SplitVecOp_VP_POPCOUNT(N, OpNo);
+    break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
@@ -4031,6 +4034,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_VP_POPCOUNT(SDNode *N, unsigned OpNo) {
+  assert(N->isVPOpcode() && "Expected VP opcode");
+  assert(OpNo == 0 && "Can only split first operand");
+
+  unsigned Opc = N->getOpcode();
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  SDLoc dl(N);
+
+  SDValue VecOp = N->getOperand(OpNo);
+  EVT VecVT = VecOp.getValueType();
+  assert(VecVT.isVector() && "Can only split reduce vector operand");
+  GetSplitVector(VecOp, Lo, Hi);
+
+  SDValue MaskLo, MaskHi;
+  std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+  SDValue EVLLo, EVLHi;
+  std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(2), VecVT, dl);
+
+  SDValue ResLo = DAG.getNode(Opc, dl, ResVT, {Lo, MaskLo, EVLLo});
+  SDValue ResHi = DAG.getNode(Opc, dl, ResVT, {Hi, MaskHi, EVLHi});
+  return DAG.getNode(ISD::ADD, dl, ResVT, ResLo, ResHi);
+}
+
 //===----------------------------------------------------------------------===//
 //  Result Vector Widening
 //===----------------------------------------------------------------------===//
@@ -6120,6 +6148,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::VP_REDUCE_FMIN:
     Res = WidenVecOp_VP_REDUCE(N);
     break;
+  case ISD::EXPERIMENTAL_VP_POPCOUNT:
+    Res = WidenVecOp_VP_POPCOUNT(N);
+    break;
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -6883,6 +6914,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
                      DAG.getVectorIdxConstant(0, DL));
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_VP_POPCOUNT(SDNode *N) {
+  EVT ResVT = N->getValueType(0);
+  SDValue Op = GetWidenedVector(N->getOperand(0));
+  SDValue Mask = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Op, Mask,
+                     N->getOperand(2));
+}
 //===----------------------------------------------------------------------===//
 // Vector Widening Utilities
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cf1b11c14b6d0..88d92b3988dd8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -766,6 +766,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
 
       setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+      setOperationAction(ISD::EXPERIMENTAL_VP_POPCOUNT, VT, Custom);
 
       setOperationPromotedToType(
           ISD::VECTOR_SPLICE, VT,
@@ -1140,6 +1141,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                              VT, Custom);
 
           setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+          setOperationAction(ISD::EXPERIMENTAL_VP_POPCOUNT, VT, Custom);
           continue;
         }
 
@@ -6607,6 +6609,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
   case ISD::EXPERIMENTAL_VP_REVERSE:
     return lowerVPReverseExperimental(Op, DAG);
+  case ISD::EXPERIMENTAL_VP_POPCOUNT:
+    return lowerVPPopcountExperimental(Op, DAG);
   }
 }
 
@@ -19744,6 +19748,34 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
   return true;
 }
 
+SDValue
+RISCVTargetLowering::lowerVPPopcountExperimental(SDValue N,
+                                                 SelectionDAG &DAG) const {
+  SDValue Op = N.getOperand(0);
+  SDValue Mask = N.getOperand(1);
+  MVT VT = Op.getSimpleValueType();
+  SDLoc DL(N);
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  bool IsUnMasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+  MVT ContainerVT = VT;
+  if (VT.isFixedLengthVector()) {
+    ContainerVT = getContainerForFixedLengthVector(VT);
+    Op = convertToScalableVector(ContainerVT, Op, DAG, Subtarget);
+    Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
+  }
+
+  if (IsUnMasked)
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Subtarget.getXLenVT(),
+                       DAG.getConstant(Intrinsic::riscv_vcpop, DL, XLenVT), Op,
+                       N->getOperand(2));
+
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Subtarget.getXLenVT(),
+                     DAG.getConstant(Intrinsic::riscv_vcpop_mask, DL, XLenVT),
+                     Op, Mask, N->getOperand(2));
+}
+
 MachineInstr *
 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
                                    MachineBasicBlock::instr_iterator &MBBI,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 486efeb8339ab..b5074d6a06372 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -905,6 +905,7 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVPPopcountExperimental(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll
new file mode 100644
index 0000000000000..6961f3996b257
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-popcount.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.experimental.vp.popcount.v1i1(<1 x i1>, <1 x i1>, i32)
+define i32 @vp_popcount_v1i1(<1 x i1> %m, <1 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v1i1(<1 x i1> %op, <1 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v1i1_unmasked(<1 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v1i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <1 x i1> %head, <1 x i1> poison, <1 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v1i1(<1 x i1> %op, <1 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1>, <2 x i1>, i32)
+define i32 @vp_popcount_v2i1(<2 x i1> %m, <2 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1> %op, <2 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v2i1_unmasked(<2 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v2i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v2i1(<2 x i1> %op, <2 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v4i1(<4 x i1>, <4 x i1>, i32)
+define i32 @vp_popcount_v4i1(<4 x i1> %m, <4 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v4i1(<4 x i1> %op, <4 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v4i1_unmasked(<4 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v4i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v4i1(<4 x i1> %op, <4 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v8i1(<8 x i1>, <8 x i1>, i32)
+define i32 @vp_popcount_v8i1(<8 x i1> %m, <8 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v8i1(<8 x i1> %op, <8 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v8i1_unmasked(<8 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v8i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v8i1(<8 x i1> %op, <8 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v16i1(<16 x i1>, <16 x i1>, i32)
+define i32 @vp_popcount_v16i1(<16 x i1> %m, <16 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v16i1(<16 x i1> %op, <16 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v16i1_unmasked(<16 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v16i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v16i1(<16 x i1> %op, <16 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v32i1(<32 x i1>, <32 x i1>, i32)
+define i32 @vp_popcount_v32i1(<32 x i1> %m, <32 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v32i1(<32 x i1> %op, <32 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v32i1_unmasked(<32 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v32i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v32i1(<32 x i1> %op, <32 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v63i1(<63 x i1>, <63 x i1>, i32)
+define i32 @vp_popcount_v63i1(<63 x i1> %m, <63 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v63i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v63i1(<63 x i1> %op, <63 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v63i1_unmasked(<63 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v63i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <63 x i1> poison, i1 true, i32 0
+  %m = shufflevector <63 x i1> %head, <63 x i1> poison, <63 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v63i1(<63 x i1> %op, <63 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v64i1(<64 x i1>, <64 x i1>, i32)
+define i32 @vp_popcount_v64i1(<64 x i1> %m, <64 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v64i1(<64 x i1> %op, <64 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v64i1_unmasked(<64 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v64i1_unmasked:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %head = insertelement <64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <64 x i1> %head, <64 x i1> poison, <64 x i32> zeroinitializer
+  %a = call i32 @llvm.experimental.vp.popcount.v64i1(<64 x i1> %op, <64 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+declare i32 @llvm.experimental.vp.popcount.v128i1(<128 x i1>, <128 x i1>, i32)
+define i32 @vp_popcount_v128i1(<128 x i1> %m, <128 x i1> %op, i32 zeroext %evl) {
+; CHECK-LABEL: vp_popcount_v128i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v8, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call i32 @llvm.experimental.vp.popcount.v128i1(<128 x i1> %op, <128 x i1> %m, i32 %evl)
+  ret i32 %a
+}
+
+define i32 @vp_popcount_v128i1_unmasked(<128...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/74294