[llvm] b5bcd4f - [RISCV] Add VL nodes and VP patterns for unary zvbb instructions

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 17 01:18:05 PDT 2023


Author: Luke Lau
Date: 2023-07-17T09:17:58+01:00
New Revision: b5bcd4f60ba3cbed05459e11d176a3f194145732

URL: https://github.com/llvm/llvm-project/commit/b5bcd4f60ba3cbed05459e11d176a3f194145732
DIFF: https://github.com/llvm/llvm-project/commit/b5bcd4f60ba3cbed05459e11d176a3f194145732.diff

LOG: [RISCV] Add VL nodes and VP patterns for unary zvbb instructions

This follows the pattern of lowering VP nodes to equivalent
RISCVISD::*_VL nodes. The nodes are modelled after the VP ISD nodes rather
than the actual zvbb instructions, and I've included a merge operand to be
consistent with the underlying pseudos that were recently refactored.

I've defined the nodes in RISCVInstrInfoVVLpatterns.td as the nodes aren't Zvk
specific, but the patterns are in RISCVInstrInfoZvk.td.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D155229

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
    llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
    llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
    llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
    llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
    llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
    llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c8f3dffdeb4a3c..58ed2f9dad2181 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -681,11 +681,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
       setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
 
-      setOperationAction({ISD::VP_BSWAP, ISD::VP_BITREVERSE}, VT, Expand);
       setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
-      setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
-                          ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
-                         VT, Expand);
 
       // Custom-lower extensions and truncations from/to mask types.
       setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
@@ -753,9 +749,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
       if (Subtarget.hasStdExtZvbb()) {
         setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal);
+        setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
+        setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
+                            ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
+                           VT, Custom);
       } else {
         setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand);
-        setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, VT, Expand);
+        setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
+        setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
+        setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
+                            ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
+                           VT, Expand);
 
         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
         // range of f32.
@@ -5430,6 +5434,18 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
   case ISD::VP_UMAX:
     return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
+  case ISD::VP_BITREVERSE:
+    return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true);
+  case ISD::VP_BSWAP:
+    return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
+  case ISD::VP_CTLZ:
+  case ISD::VP_CTLZ_ZERO_UNDEF:
+    return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
+  case ISD::VP_CTTZ:
+  case ISD::VP_CTTZ_ZERO_UNDEF:
+    return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
+  case ISD::VP_CTPOP:
+    return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
   case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
     return lowerVPStridedLoad(Op, DAG);
   case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
@@ -16054,6 +16070,11 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(SMAX_VL)
   NODE_NAME_CASE(UMIN_VL)
   NODE_NAME_CASE(UMAX_VL)
+  NODE_NAME_CASE(BITREVERSE_VL)
+  NODE_NAME_CASE(BSWAP_VL)
+  NODE_NAME_CASE(CTLZ_VL)
+  NODE_NAME_CASE(CTTZ_VL)
+  NODE_NAME_CASE(CTPOP_VL)
   NODE_NAME_CASE(FMINNUM_VL)
   NODE_NAME_CASE(FMAXNUM_VL)
   NODE_NAME_CASE(MULHS_VL)

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 03bc1d8471558b..24b73656e5a623 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -231,6 +231,12 @@ enum NodeType : unsigned {
   UMIN_VL,
   UMAX_VL,
 
+  BITREVERSE_VL,
+  BSWAP_VL,
+  CTLZ_VL,
+  CTTZ_VL,
+  CTPOP_VL,
+
   SADDSAT_VL,
   UADDSAT_VL,
   SSUBSAT_VL,

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index ab38b1b65f6f8b..93647d8f7f8792 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -21,6 +21,13 @@
 // Helpers to define the VL patterns.
 //===----------------------------------------------------------------------===//
 
+def SDT_RISCVIntUnOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+                                               SDTCisSameAs<0, 2>,
+                                               SDTCisVec<0>, SDTCisInt<0>,
+                                               SDTCVecEltisVT<3, i1>,
+                                               SDTCisSameNumEltsAs<0, 3>,
+                                               SDTCisVT<4, XLenVT>]>;
+
 def SDT_RISCVIntBinOp_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
                                                 SDTCisSameAs<0, 2>,
                                                 SDTCisVec<0>, SDTCisInt<0>,
@@ -96,6 +103,12 @@ def riscv_smax_vl  : SDNode<"RISCVISD::SMAX_VL",  SDT_RISCVIntBinOp_VL, [SDNPCom
 def riscv_umin_vl  : SDNode<"RISCVISD::UMIN_VL",  SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
 def riscv_umax_vl  : SDNode<"RISCVISD::UMAX_VL",  SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
 
+def riscv_bitreverse_vl : SDNode<"RISCVISD::BITREVERSE_VL", SDT_RISCVIntUnOp_VL>;
+def riscv_bswap_vl      : SDNode<"RISCVISD::BSWAP_VL",      SDT_RISCVIntUnOp_VL>;
+def riscv_ctlz_vl       : SDNode<"RISCVISD::CTLZ_VL",       SDT_RISCVIntUnOp_VL>;
+def riscv_cttz_vl       : SDNode<"RISCVISD::CTTZ_VL",       SDT_RISCVIntUnOp_VL>;
+def riscv_ctpop_vl      : SDNode<"RISCVISD::CTPOP_VL",      SDT_RISCVIntUnOp_VL>;
+
 def riscv_saddsat_vl   : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
 def riscv_uaddsat_vl   : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
 def riscv_ssubsat_vl   : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 297b150833f129..89871278296fcc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -220,3 +220,32 @@ defm : VPatUnarySDNode_V<bswap, "PseudoVREV8">;
 defm : VPatUnarySDNode_V<ctlz, "PseudoVCLZ">;
 defm : VPatUnarySDNode_V<cttz, "PseudoVCTZ">;
 defm : VPatUnarySDNode_V<ctpop, "PseudoVCPOP">;
+
+//===----------------------------------------------------------------------===//
+// VL patterns
+//===----------------------------------------------------------------------===//
+
+multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> {
+  foreach vti = AllIntegerVectors in {
+    let Predicates = !listconcat([HasStdExtZvbb],
+                                 GetVTypePredicates<vti>.Predicates) in {
+      def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1),
+                                (vti.Vector vti.RegClass:$merge),
+                                (vti.Mask V0),
+                                VLOpFrag)),
+                (!cast<Instruction>(instruction_name#"_V_"#vti.LMul.MX#"_MASK")
+                   vti.RegClass:$merge,
+                   vti.RegClass:$rs1,
+                   (vti.Mask V0),
+                   GPR:$vl,
+                   vti.Log2SEW,
+                   TAIL_AGNOSTIC)>;
+    }
+  }
+}
+
+defm : VPatUnaryVL_V<riscv_bitreverse_vl, "PseudoVBREV">;
+defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8">;
+defm : VPatUnaryVL_V<riscv_ctlz_vl, "PseudoVCLZ">;
+defm : VPatUnaryVL_V<riscv_cttz_vl, "PseudoVCTZ">;
+defm : VPatUnaryVL_V<riscv_ctpop_vl, "PseudoVCPOP">;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 4dd9a516352111..05b56d6c92ce3f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -3,6 +3,10 @@
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
 
 declare <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
 
@@ -28,6 +32,12 @@ define <vscale x 1 x i8> @vp_bitreverse_nxv1i8(<vscale x 1 x i8> %va, <vscale x
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i8> %v
 }
@@ -54,6 +64,12 @@ define <vscale x 1 x i8> @vp_bitreverse_nxv1i8_unmasked(<vscale x 1 x i8> %va, i
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vor.vv v8, v9, v8
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -84,6 +100,12 @@ define <vscale x 2 x i8> @vp_bitreverse_nxv2i8(<vscale x 2 x i8> %va, <vscale x
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
 }
@@ -110,6 +132,12 @@ define <vscale x 2 x i8> @vp_bitreverse_nxv2i8_unmasked(<vscale x 2 x i8> %va, i
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vor.vv v8, v9, v8
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -140,6 +168,12 @@ define <vscale x 4 x i8> @vp_bitreverse_nxv4i8(<vscale x 4 x i8> %va, <vscale x
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i8> %v
 }
@@ -166,6 +200,12 @@ define <vscale x 4 x i8> @vp_bitreverse_nxv4i8_unmasked(<vscale x 4 x i8> %va, i
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vor.vv v8, v9, v8
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -196,6 +236,12 @@ define <vscale x 8 x i8> @vp_bitreverse_nxv8i8(<vscale x 8 x i8> %va, <vscale x
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i8> %v
 }
@@ -222,6 +268,12 @@ define <vscale x 8 x i8> @vp_bitreverse_nxv8i8_unmasked(<vscale x 8 x i8> %va, i
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vor.vv v8, v9, v8
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -252,6 +304,12 @@ define <vscale x 16 x i8> @vp_bitreverse_nxv16i8(<vscale x 16 x i8> %va, <vscale
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i8> %v
 }
@@ -278,6 +336,12 @@ define <vscale x 16 x i8> @vp_bitreverse_nxv16i8_unmasked(<vscale x 16 x i8> %va
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vor.vv v8, v10, v8
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -308,6 +372,12 @@ define <vscale x 32 x i8> @vp_bitreverse_nxv32i8(<vscale x 32 x i8> %va, <vscale
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i8> %v
 }
@@ -334,6 +404,12 @@ define <vscale x 32 x i8> @vp_bitreverse_nxv32i8_unmasked(<vscale x 32 x i8> %va
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vor.vv v8, v12, v8
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
@@ -364,6 +440,12 @@ define <vscale x 64 x i8> @vp_bitreverse_nxv64i8(<vscale x 64 x i8> %va, <vscale
 ; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i8> %v
 }
@@ -390,6 +472,12 @@ define <vscale x 64 x i8> @vp_bitreverse_nxv64i8_unmasked(<vscale x 64 x i8> %va
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vor.vv v8, v16, v8
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
@@ -456,6 +544,12 @@ define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
 }
@@ -518,6 +612,12 @@ define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -584,6 +684,12 @@ define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
 }
@@ -646,6 +752,12 @@ define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -712,6 +824,12 @@ define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
 }
@@ -774,6 +892,12 @@ define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -840,6 +964,12 @@ define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
 }
@@ -902,6 +1032,12 @@ define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v10, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -968,6 +1104,12 @@ define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vsc
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i16> %v
 }
@@ -1030,6 +1172,12 @@ define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16>
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v12, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -1096,6 +1244,12 @@ define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vsc
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i16> %v
 }
@@ -1158,6 +1312,12 @@ define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16>
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v16, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
@@ -1240,6 +1400,12 @@ define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
 }
@@ -1318,6 +1484,12 @@ define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -1400,6 +1572,12 @@ define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
 }
@@ -1478,6 +1656,12 @@ define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -1560,6 +1744,12 @@ define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
 }
@@ -1638,6 +1828,12 @@ define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v10, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -1720,6 +1916,12 @@ define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
 }
@@ -1798,6 +2000,12 @@ define <vscale x 8 x i32> @vp_bitreverse_nxv8i32_unmasked(<vscale x 8 x i32> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v12, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1880,6 +2088,12 @@ define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vsc
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i32> %v
 }
@@ -1958,6 +2172,12 @@ define <vscale x 16 x i32> @vp_bitreverse_nxv16i32_unmasked(<vscale x 16 x i32>
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v16, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -2106,6 +2326,12 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i64> %v
 }
@@ -2250,6 +2476,12 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -2398,6 +2630,12 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
 }
@@ -2542,6 +2780,12 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v10, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -2690,6 +2934,12 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale
 ; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %v
 }
@@ -2834,6 +3084,12 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v12, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -3046,6 +3302,12 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x i64> %v
 }
@@ -3204,6 +3466,12 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v16, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -3416,6 +3684,12 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %v
 }
@@ -3574,6 +3848,12 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v16, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -3757,6 +4037,29 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    srli a2, a1, 1
+; CHECK-ZVBB-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-ZVBB-NEXT:    slli a1, a1, 2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v16, v16, v0.t
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB46_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB46_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i16> %v
 }
@@ -3877,6 +4180,24 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16>
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vor.vv v8, v24, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    slli a1, a1, 2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v16, v16
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB47_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB47_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 171cb8e256ad2c..5ba354887bad66 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -3,6 +3,10 @@
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
 
 declare <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
 
@@ -14,6 +18,12 @@ define <vscale x 1 x i16> @vp_bswap_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1
 ; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
 ; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
 }
@@ -26,6 +36,12 @@ define <vscale x 1 x i16> @vp_bswap_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
 ; CHECK-NEXT:    vsll.vi v8, v8, 8
 ; CHECK-NEXT:    vor.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv1i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -42,6 +58,12 @@ define <vscale x 2 x i16> @vp_bswap_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2
 ; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
 ; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv2i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
 }
@@ -54,6 +76,12 @@ define <vscale x 2 x i16> @vp_bswap_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
 ; CHECK-NEXT:    vsll.vi v8, v8, 8
 ; CHECK-NEXT:    vor.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv2i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -70,6 +98,12 @@ define <vscale x 4 x i16> @vp_bswap_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4
 ; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
 ; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv4i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
 }
@@ -82,6 +116,12 @@ define <vscale x 4 x i16> @vp_bswap_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
 ; CHECK-NEXT:    vsll.vi v8, v8, 8
 ; CHECK-NEXT:    vor.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv4i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -98,6 +138,12 @@ define <vscale x 8 x i16> @vp_bswap_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8
 ; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
 ; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv8i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
 }
@@ -110,6 +156,12 @@ define <vscale x 8 x i16> @vp_bswap_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
 ; CHECK-NEXT:    vsll.vi v8, v8, 8
 ; CHECK-NEXT:    vor.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv8i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -126,6 +178,12 @@ define <vscale x 16 x i16> @vp_bswap_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
 ; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv16i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i16> %v
 }
@@ -138,6 +196,12 @@ define <vscale x 16 x i16> @vp_bswap_nxv16i16_unmasked(<vscale x 16 x i16> %va,
 ; CHECK-NEXT:    vsll.vi v8, v8, 8
 ; CHECK-NEXT:    vor.vv v8, v8, v12
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv16i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -154,6 +218,12 @@ define <vscale x 32 x i16> @vp_bswap_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
 ; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i16> %v
 }
@@ -166,6 +236,12 @@ define <vscale x 32 x i16> @vp_bswap_nxv32i16_unmasked(<vscale x 32 x i16> %va,
 ; CHECK-NEXT:    vsll.vi v8, v8, 8
 ; CHECK-NEXT:    vor.vv v8, v8, v16
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv32i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i16> @llvm.vp.bswap.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
@@ -206,6 +282,12 @@ define <vscale x 1 x i32> @vp_bswap_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv1i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
 }
@@ -242,6 +324,12 @@ define <vscale x 1 x i32> @vp_bswap_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv1i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -282,6 +370,12 @@ define <vscale x 2 x i32> @vp_bswap_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv2i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
 }
@@ -318,6 +412,12 @@ define <vscale x 2 x i32> @vp_bswap_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv2i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -358,6 +458,12 @@ define <vscale x 4 x i32> @vp_bswap_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv4i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
 }
@@ -394,6 +500,12 @@ define <vscale x 4 x i32> @vp_bswap_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv4i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -434,6 +546,12 @@ define <vscale x 8 x i32> @vp_bswap_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv8i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
 }
@@ -470,6 +588,12 @@ define <vscale x 8 x i32> @vp_bswap_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv8i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -510,6 +634,12 @@ define <vscale x 16 x i32> @vp_bswap_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; RV64-NEXT:    vor.vv v8, v8, v24, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv16i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i32> %v
 }
@@ -546,6 +676,12 @@ define <vscale x 16 x i32> @vp_bswap_nxv16i32_unmasked(<vscale x 16 x i32> %va,
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv16i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -628,6 +764,12 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv1i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i64> %v
 }
@@ -706,6 +848,12 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv1i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -788,6 +936,12 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv2i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
 }
@@ -866,6 +1020,12 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vor.vv v8, v10, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv2i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -948,6 +1108,12 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv4i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %v
 }
@@ -1026,6 +1192,12 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    vor.vv v8, v12, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv4i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -1172,6 +1344,12 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv7i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x i64> %v
 }
@@ -1263,6 +1441,12 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vor.vv v8, v16, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv7i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x i64> @llvm.vp.bswap.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -1409,6 +1593,12 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv8i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %v
 }
@@ -1500,6 +1690,12 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vor.vv v8, v16, v8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv8i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1560,6 +1756,29 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv64i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    srli a2, a1, 1
+; CHECK-ZVBB-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-ZVBB-NEXT:    slli a1, a1, 2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v16, v16, v0.t
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB32_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB32_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i16> %v
 }
@@ -1586,6 +1805,24 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16_unmasked(<vscale x 64 x i16> %va,
 ; CHECK-NEXT:    vsll.vi v8, v8, 8
 ; CHECK-NEXT:    vor.vv v8, v8, v24
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv64i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    slli a1, a1, 2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v16, v16
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB33_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB33_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vrev8.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index d7faa8ba9fe8d6..5ee4cb105da448 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -3,6 +3,10 @@
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
 
 declare <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
 
@@ -30,6 +34,12 @@ define <vscale x 1 x i8> @vp_ctlz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i8> %v
 }
@@ -58,6 +68,12 @@ define <vscale x 1 x i8> @vp_ctlz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -90,6 +106,12 @@ define <vscale x 2 x i8> @vp_ctlz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
 }
@@ -118,6 +140,12 @@ define <vscale x 2 x i8> @vp_ctlz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -150,6 +178,12 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i8> %v
 }
@@ -178,6 +212,12 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -210,6 +250,12 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i8> %v
 }
@@ -238,6 +284,12 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -270,6 +322,12 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i8> %v
 }
@@ -298,6 +356,12 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v10
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -330,6 +394,12 @@ define <vscale x 32 x i8> @vp_ctlz_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32
 ; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i8> %v
 }
@@ -358,6 +428,12 @@ define <vscale x 32 x i8> @vp_ctlz_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v12
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
@@ -390,6 +466,12 @@ define <vscale x 64 x i8> @vp_ctlz_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64
 ; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv64i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i8> %v
 }
@@ -418,6 +500,12 @@ define <vscale x 64 x i8> @vp_ctlz_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv64i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> %m, i32 %evl)
@@ -492,6 +580,12 @@ define <vscale x 1 x i16> @vp_ctlz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
 }
@@ -562,6 +656,12 @@ define <vscale x 1 x i16> @vp_ctlz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -636,6 +736,12 @@ define <vscale x 2 x i16> @vp_ctlz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
 }
@@ -706,6 +812,12 @@ define <vscale x 2 x i16> @vp_ctlz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -780,6 +892,12 @@ define <vscale x 4 x i16> @vp_ctlz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
 }
@@ -850,6 +968,12 @@ define <vscale x 4 x i16> @vp_ctlz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -924,6 +1048,12 @@ define <vscale x 8 x i16> @vp_ctlz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
 }
@@ -994,6 +1124,12 @@ define <vscale x 8 x i16> @vp_ctlz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -1068,6 +1204,12 @@ define <vscale x 16 x i16> @vp_ctlz_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i16> %v
 }
@@ -1138,6 +1280,12 @@ define <vscale x 16 x i16> @vp_ctlz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -1212,6 +1360,12 @@ define <vscale x 32 x i16> @vp_ctlz_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i16> %v
 }
@@ -1282,6 +1436,12 @@ define <vscale x 32 x i16> @vp_ctlz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
@@ -1362,6 +1522,12 @@ define <vscale x 1 x i32> @vp_ctlz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
 }
@@ -1438,6 +1604,12 @@ define <vscale x 1 x i32> @vp_ctlz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -1518,6 +1690,12 @@ define <vscale x 2 x i32> @vp_ctlz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
 }
@@ -1594,6 +1772,12 @@ define <vscale x 2 x i32> @vp_ctlz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -1674,6 +1858,12 @@ define <vscale x 4 x i32> @vp_ctlz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
 }
@@ -1750,6 +1940,12 @@ define <vscale x 4 x i32> @vp_ctlz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -1830,6 +2026,12 @@ define <vscale x 8 x i32> @vp_ctlz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
 }
@@ -1906,6 +2108,12 @@ define <vscale x 8 x i32> @vp_ctlz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -1986,6 +2194,12 @@ define <vscale x 16 x i32> @vp_ctlz_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i32> %v
 }
@@ -2062,6 +2276,12 @@ define <vscale x 16 x i32> @vp_ctlz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -2185,6 +2405,12 @@ define <vscale x 1 x i64> @vp_ctlz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i64> %v
 }
@@ -2304,6 +2530,12 @@ define <vscale x 1 x i64> @vp_ctlz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -2427,6 +2659,12 @@ define <vscale x 2 x i64> @vp_ctlz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
 }
@@ -2546,6 +2784,12 @@ define <vscale x 2 x i64> @vp_ctlz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -2669,6 +2913,12 @@ define <vscale x 4 x i64> @vp_ctlz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %v
 }
@@ -2788,6 +3038,12 @@ define <vscale x 4 x i64> @vp_ctlz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -2911,6 +3167,12 @@ define <vscale x 7 x i64> @vp_ctlz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 7 x i64> @llvm.vp.ctlz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x i64> %v
 }
@@ -3030,6 +3292,12 @@ define <vscale x 7 x i64> @vp_ctlz_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x i64> @llvm.vp.ctlz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> %m, i32 %evl)
@@ -3153,6 +3421,12 @@ define <vscale x 8 x i64> @vp_ctlz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %v
 }
@@ -3272,6 +3546,12 @@ define <vscale x 8 x i64> @vp_ctlz_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -3614,6 +3894,28 @@ define <vscale x 16 x i64> @vp_ctlz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    srli a2, a1, 3
+; CHECK-ZVBB-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v16, v16, v0.t
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB46_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB46_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i64> %v
 }
@@ -3846,6 +4148,23 @@ define <vscale x 16 x i64> @vp_ctlz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a6
 ; RV64-NEXT:    vsrl.vx v8, v8, a7
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v16, v16
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB47_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB47_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -3876,6 +4195,12 @@ define <vscale x 1 x i8> @vp_ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i8> %v
 }
@@ -3904,6 +4229,12 @@ define <vscale x 1 x i8> @vp_ctlz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -3935,6 +4266,12 @@ define <vscale x 2 x i8> @vp_ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
 }
@@ -3963,6 +4300,12 @@ define <vscale x 2 x i8> @vp_ctlz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -3994,6 +4337,12 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i8> %v
 }
@@ -4022,6 +4371,12 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -4053,6 +4408,12 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i8> %v
 }
@@ -4081,6 +4442,12 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -4112,6 +4479,12 @@ define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
 ; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i8> %v
 }
@@ -4140,6 +4513,12 @@ define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8
 ; CHECK-NEXT:    vadd.vv v8, v8, v10
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
@@ -4171,6 +4550,12 @@ define <vscale x 32 x i8> @vp_ctlz_zero_undef_nxv32i8(<vscale x 32 x i8> %va, <v
 ; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i8> %v
 }
@@ -4199,6 +4584,12 @@ define <vscale x 32 x i8> @vp_ctlz_zero_undef_nxv32i8_unmasked(<vscale x 32 x i8
 ; CHECK-NEXT:    vadd.vv v8, v8, v12
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
@@ -4230,6 +4621,12 @@ define <vscale x 64 x i8> @vp_ctlz_zero_undef_nxv64i8(<vscale x 64 x i8> %va, <v
 ; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv64i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i8> %v
 }
@@ -4258,6 +4655,12 @@ define <vscale x 64 x i8> @vp_ctlz_zero_undef_nxv64i8_unmasked(<vscale x 64 x i8
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv64i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> %m, i32 %evl)
@@ -4331,6 +4734,12 @@ define <vscale x 1 x i16> @vp_ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
 }
@@ -4401,6 +4810,12 @@ define <vscale x 1 x i16> @vp_ctlz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -4474,6 +4889,12 @@ define <vscale x 2 x i16> @vp_ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
 }
@@ -4544,6 +4965,12 @@ define <vscale x 2 x i16> @vp_ctlz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -4617,6 +5044,12 @@ define <vscale x 4 x i16> @vp_ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
 }
@@ -4687,6 +5120,12 @@ define <vscale x 4 x i16> @vp_ctlz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -4760,6 +5199,12 @@ define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
 }
@@ -4830,6 +5275,12 @@ define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -4903,6 +5354,12 @@ define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i16> %v
 }
@@ -4973,6 +5430,12 @@ define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
@@ -5046,6 +5509,12 @@ define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i16> %v
 }
@@ -5116,6 +5585,12 @@ define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16_unmasked(<vscale x 32 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
@@ -5195,6 +5670,12 @@ define <vscale x 1 x i32> @vp_ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
 }
@@ -5271,6 +5752,12 @@ define <vscale x 1 x i32> @vp_ctlz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -5350,6 +5837,12 @@ define <vscale x 2 x i32> @vp_ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
 }
@@ -5426,6 +5919,12 @@ define <vscale x 2 x i32> @vp_ctlz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -5505,6 +6004,12 @@ define <vscale x 4 x i32> @vp_ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
 }
@@ -5581,6 +6086,12 @@ define <vscale x 4 x i32> @vp_ctlz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -5660,6 +6171,12 @@ define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
 }
@@ -5736,6 +6253,12 @@ define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -5815,6 +6338,12 @@ define <vscale x 16 x i32> @vp_ctlz_zero_undef_nxv16i32(<vscale x 16 x i32> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i32> %v
 }
@@ -5891,6 +6420,12 @@ define <vscale x 16 x i32> @vp_ctlz_zero_undef_nxv16i32_unmasked(<vscale x 16 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
@@ -6013,6 +6548,12 @@ define <vscale x 1 x i64> @vp_ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i64> %v
 }
@@ -6132,6 +6673,12 @@ define <vscale x 1 x i64> @vp_ctlz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -6254,6 +6801,12 @@ define <vscale x 2 x i64> @vp_ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
 }
@@ -6373,6 +6926,12 @@ define <vscale x 2 x i64> @vp_ctlz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -6495,6 +7054,12 @@ define <vscale x 4 x i64> @vp_ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %v
 }
@@ -6614,6 +7179,12 @@ define <vscale x 4 x i64> @vp_ctlz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -6736,6 +7307,12 @@ define <vscale x 7 x i64> @vp_ctlz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 7 x i64> @llvm.vp.ctlz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x i64> %v
 }
@@ -6855,6 +7432,12 @@ define <vscale x 7 x i64> @vp_ctlz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x i64> @llvm.vp.ctlz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
@@ -6977,6 +7560,12 @@ define <vscale x 8 x i64> @vp_ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %v
 }
@@ -7096,6 +7685,12 @@ define <vscale x 8 x i64> @vp_ctlz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -7436,6 +8031,28 @@ define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    srli a2, a1, 3
+; CHECK-ZVBB-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v16, v16, v0.t
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB94_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB94_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i64> %v
 }
@@ -7668,6 +8285,23 @@ define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64_unmasked(<vscale x 16 x
 ; RV64-NEXT:    vmul.vx v8, v8, a6
 ; RV64-NEXT:    vsrl.vx v8, v8, a7
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v16, v16
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB95_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB95_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 6d24b1615ba814..f02d5d4818a8c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -3,6 +3,10 @@
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
 
 declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
 
@@ -23,6 +27,12 @@ define <vscale x 1 x i8> @vp_ctpop_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i8> %v
 }
@@ -44,6 +54,12 @@ define <vscale x 1 x i8> @vp_ctpop_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 ze
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -69,6 +85,12 @@ define <vscale x 2 x i8> @vp_ctpop_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
 }
@@ -90,6 +112,12 @@ define <vscale x 2 x i8> @vp_ctpop_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 ze
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -115,6 +143,12 @@ define <vscale x 4 x i8> @vp_ctpop_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i8> %v
 }
@@ -136,6 +170,12 @@ define <vscale x 4 x i8> @vp_ctpop_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 ze
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -161,6 +201,12 @@ define <vscale x 8 x i8> @vp_ctpop_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i8> %v
 }
@@ -182,6 +228,12 @@ define <vscale x 8 x i8> @vp_ctpop_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 ze
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -207,6 +259,12 @@ define <vscale x 16 x i8> @vp_ctpop_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i8> %v
 }
@@ -228,6 +286,12 @@ define <vscale x 16 x i8> @vp_ctpop_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v10
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -253,6 +317,12 @@ define <vscale x 32 x i8> @vp_ctpop_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32
 ; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i8> %v
 }
@@ -274,6 +344,12 @@ define <vscale x 32 x i8> @vp_ctpop_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v12
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
@@ -299,6 +375,12 @@ define <vscale x 64 x i8> @vp_ctpop_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64
 ; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i8> %v
 }
@@ -320,6 +402,12 @@ define <vscale x 64 x i8> @vp_ctpop_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
@@ -376,6 +464,12 @@ define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
 }
@@ -428,6 +522,12 @@ define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -484,6 +584,12 @@ define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
 }
@@ -536,6 +642,12 @@ define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -592,6 +704,12 @@ define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
 }
@@ -644,6 +762,12 @@ define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -700,6 +824,12 @@ define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
 }
@@ -752,6 +882,12 @@ define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -808,6 +944,12 @@ define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i16> %v
 }
@@ -860,6 +1002,12 @@ define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -916,6 +1064,12 @@ define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i16> %v
 }
@@ -968,6 +1122,12 @@ define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
@@ -1026,6 +1186,12 @@ define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
 }
@@ -1080,6 +1246,12 @@ define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -1138,6 +1310,12 @@ define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
 }
@@ -1192,6 +1370,12 @@ define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -1250,6 +1434,12 @@ define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
 }
@@ -1304,6 +1494,12 @@ define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -1362,6 +1558,12 @@ define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
 }
@@ -1416,6 +1618,12 @@ define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1474,6 +1682,12 @@ define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i32> %v
 }
@@ -1528,6 +1742,12 @@ define <vscale x 16 x i32> @vp_ctpop_nxv16i32_unmasked(<vscale x 16 x i32> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -1623,6 +1843,12 @@ define <vscale x 1 x i64> @vp_ctpop_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i64> %v
 }
@@ -1714,6 +1940,12 @@ define <vscale x 1 x i64> @vp_ctpop_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -1809,6 +2041,12 @@ define <vscale x 2 x i64> @vp_ctpop_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
 }
@@ -1900,6 +2138,12 @@ define <vscale x 2 x i64> @vp_ctpop_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -1995,6 +2239,12 @@ define <vscale x 4 x i64> @vp_ctpop_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %v
 }
@@ -2086,6 +2336,12 @@ define <vscale x 4 x i64> @vp_ctpop_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -2181,6 +2437,12 @@ define <vscale x 7 x i64> @vp_ctpop_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x i64> %v
 }
@@ -2272,6 +2534,12 @@ define <vscale x 7 x i64> @vp_ctpop_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -2367,6 +2635,12 @@ define <vscale x 8 x i64> @vp_ctpop_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %v
 }
@@ -2458,6 +2732,12 @@ define <vscale x 8 x i64> @vp_ctpop_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -2771,6 +3051,28 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    srli a2, a1, 3
+; CHECK-ZVBB-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v16, v16, v0.t
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB46_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB46_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i64> %v
 }
@@ -2963,6 +3265,23 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
 ; RV64-NEXT:    vmul.vx v16, v16, a5
 ; RV64-NEXT:    vsrl.vx v16, v16, a6
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v16, v16
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB47_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB47_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 %evl)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index ce0ce9d588fb4b..38647bb21f7f60 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -3,6 +3,10 @@
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
 
 declare <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
 
@@ -27,6 +31,12 @@ define <vscale x 1 x i8> @vp_cttz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i8> %v
 }
@@ -52,6 +62,12 @@ define <vscale x 1 x i8> @vp_cttz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -81,6 +97,12 @@ define <vscale x 2 x i8> @vp_cttz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
 }
@@ -106,6 +128,12 @@ define <vscale x 2 x i8> @vp_cttz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -135,6 +163,12 @@ define <vscale x 4 x i8> @vp_cttz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i8> %v
 }
@@ -160,6 +194,12 @@ define <vscale x 4 x i8> @vp_cttz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -189,6 +229,12 @@ define <vscale x 8 x i8> @vp_cttz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i8> %v
 }
@@ -214,6 +260,12 @@ define <vscale x 8 x i8> @vp_cttz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zer
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -243,6 +295,12 @@ define <vscale x 16 x i8> @vp_cttz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i8> %v
 }
@@ -268,6 +326,12 @@ define <vscale x 16 x i8> @vp_cttz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v10
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -297,6 +361,12 @@ define <vscale x 32 x i8> @vp_cttz_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32
 ; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i8> %v
 }
@@ -322,6 +392,12 @@ define <vscale x 32 x i8> @vp_cttz_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v12
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
@@ -351,6 +427,12 @@ define <vscale x 64 x i8> @vp_cttz_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64
 ; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i8> %v
 }
@@ -376,6 +458,12 @@ define <vscale x 64 x i8> @vp_cttz_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> %m, i32 %evl)
@@ -440,6 +528,12 @@ define <vscale x 1 x i16> @vp_cttz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
 }
@@ -500,6 +594,12 @@ define <vscale x 1 x i16> @vp_cttz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -564,6 +664,12 @@ define <vscale x 2 x i16> @vp_cttz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
 }
@@ -624,6 +730,12 @@ define <vscale x 2 x i16> @vp_cttz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -688,6 +800,12 @@ define <vscale x 4 x i16> @vp_cttz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
 }
@@ -748,6 +866,12 @@ define <vscale x 4 x i16> @vp_cttz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -812,6 +936,12 @@ define <vscale x 8 x i16> @vp_cttz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
 }
@@ -872,6 +1002,12 @@ define <vscale x 8 x i16> @vp_cttz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -936,6 +1072,12 @@ define <vscale x 16 x i16> @vp_cttz_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i16> %v
 }
@@ -996,6 +1138,12 @@ define <vscale x 16 x i16> @vp_cttz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -1060,6 +1208,12 @@ define <vscale x 32 x i16> @vp_cttz_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i16> %v
 }
@@ -1120,6 +1274,12 @@ define <vscale x 32 x i16> @vp_cttz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
@@ -1186,6 +1346,12 @@ define <vscale x 1 x i32> @vp_cttz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
 }
@@ -1248,6 +1414,12 @@ define <vscale x 1 x i32> @vp_cttz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -1314,6 +1486,12 @@ define <vscale x 2 x i32> @vp_cttz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
 }
@@ -1376,6 +1554,12 @@ define <vscale x 2 x i32> @vp_cttz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -1442,6 +1626,12 @@ define <vscale x 4 x i32> @vp_cttz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
 }
@@ -1504,6 +1694,12 @@ define <vscale x 4 x i32> @vp_cttz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -1570,6 +1766,12 @@ define <vscale x 8 x i32> @vp_cttz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
 }
@@ -1632,6 +1834,12 @@ define <vscale x 8 x i32> @vp_cttz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -1698,6 +1906,12 @@ define <vscale x 16 x i32> @vp_cttz_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i32> %v
 }
@@ -1760,6 +1974,12 @@ define <vscale x 16 x i32> @vp_cttz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -1863,6 +2083,12 @@ define <vscale x 1 x i64> @vp_cttz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i64> %v
 }
@@ -1962,6 +2188,12 @@ define <vscale x 1 x i64> @vp_cttz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
@@ -2065,6 +2297,12 @@ define <vscale x 2 x i64> @vp_cttz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
 }
@@ -2164,6 +2402,12 @@ define <vscale x 2 x i64> @vp_cttz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
@@ -2267,6 +2511,12 @@ define <vscale x 4 x i64> @vp_cttz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %v
 }
@@ -2366,6 +2616,12 @@ define <vscale x 4 x i64> @vp_cttz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
@@ -2469,6 +2725,12 @@ define <vscale x 7 x i64> @vp_cttz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x i64> %v
 }
@@ -2568,6 +2830,12 @@ define <vscale x 7 x i64> @vp_cttz_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> %m, i32 %evl)
@@ -2671,6 +2939,12 @@ define <vscale x 8 x i64> @vp_cttz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %v
 }
@@ -2770,6 +3044,12 @@ define <vscale x 8 x i64> @vp_cttz_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
@@ -3057,6 +3337,28 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    srli a2, a1, 3
+; CHECK-ZVBB-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v16, v16, v0.t
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB46_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB46_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i64> %v
 }
@@ -3249,6 +3551,23 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
 ; RV64-NEXT:    vmul.vx v8, v8, a6
 ; RV64-NEXT:    vsrl.vx v8, v8, a7
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v16, v16
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB47_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB47_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
@@ -3276,6 +3595,12 @@ define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i8> %v
 }
@@ -3301,6 +3626,12 @@ define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -3329,6 +3660,12 @@ define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
 }
@@ -3354,6 +3691,12 @@ define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -3382,6 +3725,12 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i8> %v
 }
@@ -3407,6 +3756,12 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -3435,6 +3790,12 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
 ; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i8> %v
 }
@@ -3460,6 +3821,12 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -3488,6 +3855,12 @@ define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
 ; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i8> %v
 }
@@ -3513,6 +3886,12 @@ define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8
 ; CHECK-NEXT:    vadd.vv v8, v8, v10
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
@@ -3541,6 +3920,12 @@ define <vscale x 32 x i8> @vp_cttz_zero_undef_nxv32i8(<vscale x 32 x i8> %va, <v
 ; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i8> %v
 }
@@ -3566,6 +3951,12 @@ define <vscale x 32 x i8> @vp_cttz_zero_undef_nxv32i8_unmasked(<vscale x 32 x i8
 ; CHECK-NEXT:    vadd.vv v8, v8, v12
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
@@ -3594,6 +3985,12 @@ define <vscale x 64 x i8> @vp_cttz_zero_undef_nxv64i8(<vscale x 64 x i8> %va, <v
 ; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i8> %v
 }
@@ -3619,6 +4016,12 @@ define <vscale x 64 x i8> @vp_cttz_zero_undef_nxv64i8_unmasked(<vscale x 64 x i8
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
 ; CHECK-NEXT:    vand.vi v8, v8, 15
 ; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
   %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> %m, i32 %evl)
@@ -3682,6 +4085,12 @@ define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
 }
@@ -3742,6 +4151,12 @@ define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -3805,6 +4220,12 @@ define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
 }
@@ -3865,6 +4286,12 @@ define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -3928,6 +4355,12 @@ define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
 }
@@ -3988,6 +4421,12 @@ define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -4051,6 +4490,12 @@ define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
 }
@@ -4111,6 +4556,12 @@ define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -4174,6 +4625,12 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i16> %v
 }
@@ -4234,6 +4691,12 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
@@ -4297,6 +4760,12 @@ define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i16> %v
 }
@@ -4357,6 +4826,12 @@ define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16_unmasked(<vscale x 32 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 8
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
@@ -4422,6 +4897,12 @@ define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
 }
@@ -4484,6 +4965,12 @@ define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -4549,6 +5036,12 @@ define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
 }
@@ -4611,6 +5104,12 @@ define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -4676,6 +5175,12 @@ define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
 }
@@ -4738,6 +5243,12 @@ define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -4803,6 +5314,12 @@ define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <v
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
 }
@@ -4865,6 +5382,12 @@ define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -4930,6 +5453,12 @@ define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va,
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i32> %v
 }
@@ -4992,6 +5521,12 @@ define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32_unmasked(<vscale x 16 x
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 24
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
@@ -5094,6 +5629,12 @@ define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i64> %v
 }
@@ -5193,6 +5734,12 @@ define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
@@ -5295,6 +5842,12 @@ define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
 }
@@ -5394,6 +5947,12 @@ define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
@@ -5496,6 +6055,12 @@ define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %v
 }
@@ -5595,6 +6160,12 @@ define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
@@ -5697,6 +6268,12 @@ define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x i64> %v
 }
@@ -5796,6 +6373,12 @@ define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
@@ -5898,6 +6481,12 @@ define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <v
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %v
 }
@@ -5997,6 +6586,12 @@ define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
@@ -6282,6 +6877,28 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    srli a2, a1, 3
+; CHECK-ZVBB-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v16, v16, v0.t
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB94_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB94_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT:    ret
   %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i64> %v
 }
@@ -6474,6 +7091,23 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64_unmasked(<vscale x 16 x
 ; RV64-NEXT:    vmul.vx v8, v8, a6
 ; RV64-NEXT:    vsrl.vx v8, v8, a7
 ; RV64-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    csrr a1, vlenb
+; CHECK-ZVBB-NEXT:    sub a2, a0, a1
+; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
+; CHECK-ZVBB-NEXT:    addi a3, a3, -1
+; CHECK-ZVBB-NEXT:    and a2, a3, a2
+; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v16, v16
+; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB95_2
+; CHECK-ZVBB-NEXT:  # %bb.1:
+; CHECK-ZVBB-NEXT:    mv a0, a1
+; CHECK-ZVBB-NEXT:  .LBB95_2:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)


        


More information about the llvm-commits mailing list