[llvm] 768740e - [RISCV] Lower unary zvbb ops for fixed vectors
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 8 01:47:08 PDT 2023
Author: Luke Lau
Date: 2023-08-08T09:46:57+01:00
New Revision: 768740ef7727fe892de269092509c55c0723b05f
URL: https://github.com/llvm/llvm-project/commit/768740ef7727fe892de269092509c55c0723b05f
DIFF: https://github.com/llvm/llvm-project/commit/768740ef7727fe892de269092509c55c0723b05f.diff
LOG: [RISCV] Lower unary zvbb ops for fixed vectors
This reuses the same strategy for fixed vectors as other ops, i.e. custom lower
to a scalable *_vl SD node.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D157294
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 25a1dccd7b75a4..cfd45d6e6aa5c7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1058,13 +1058,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(IntegerVPOps, VT, Custom);
- // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
- // range of f32.
- EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- if (isTypeLegal(FloatVT))
- setOperationAction(
- {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
- Custom);
+ if (Subtarget.hasStdExtZvbb()) {
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP, ISD::CTLZ,
+ ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ,
+ ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
+ VT, Custom);
+ } else {
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
+ // range of f32.
+ EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT))
+ setOperationAction(
+ {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
+ }
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -4880,6 +4887,11 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SHL)
OP_CASE(SRA)
OP_CASE(SRL)
+ OP_CASE(BSWAP)
+ OP_CASE(CTTZ)
+ OP_CASE(CTLZ)
+ OP_CASE(CTPOP)
+ OP_CASE(BITREVERSE)
OP_CASE(SADDSAT)
OP_CASE(UADDSAT)
OP_CASE(SSUBSAT)
@@ -4927,8 +4939,10 @@ static unsigned getRISCVVLOp(SDValue Op) {
VP_CASE(CTLZ) // VP_CTLZ
VP_CASE(CTTZ) // VP_CTTZ
VP_CASE(CTPOP) // VP_CTPOP
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::VP_CTLZ_ZERO_UNDEF:
return RISCVISD::CTLZ_VL;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::VP_CTTZ_ZERO_UNDEF:
return RISCVISD::CTTZ_VL;
case ISD::FMA:
@@ -5156,6 +5170,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerIS_FPCLASS(Op, DAG);
case ISD::BITREVERSE: {
MVT VT = Op.getSimpleValueType();
+ if (VT.isFixedLengthVector()) {
+ assert(Subtarget.hasStdExtZvbb());
+ return lowerToScalableOp(Op, DAG);
+ }
SDLoc DL(Op);
assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
@@ -5668,6 +5686,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SREM:
case ISD::UDIV:
case ISD::UREM:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
return lowerToScalableOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
@@ -5702,7 +5722,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerABS(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
+ if (Subtarget.hasStdExtZvbb())
+ return lowerToScalableOp(Op, DAG);
+ assert(Op.getOpcode() != ISD::CTTZ);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 12a9c84b4e2b81..f1b00e39864004 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX2-RV64
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX1-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
define void @bitreverse_v8i16(ptr %x, ptr %y) {
; RV32-LABEL: bitreverse_v8i16:
@@ -66,6 +68,14 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
+;
+; ZVBB-LABEL: bitreverse_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vbrev.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
@@ -152,6 +162,14 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
+;
+; ZVBB-LABEL: bitreverse_v4i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vbrev.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
@@ -291,6 +309,14 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
+;
+; ZVBB-LABEL: bitreverse_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vbrev.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
@@ -465,6 +491,14 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: bitreverse_v16i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vbrev.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
@@ -683,6 +717,14 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse32.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: bitreverse_v8i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vbrev.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
@@ -1033,6 +1075,14 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT: vse64.v v9, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: bitreverse_v4i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vbrev.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index 9fc1d680c821c0..e393fef62a2510 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
define void @bswap_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: bswap_v8i16:
@@ -14,6 +16,14 @@ define void @bswap_v8i16(ptr %x, ptr %y) {
; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: bswap_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vrev8.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
@@ -58,6 +68,14 @@ define void @bswap_v4i32(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
+;
+; ZVBB-LABEL: bswap_v4i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vrev8.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
@@ -140,6 +158,14 @@ define void @bswap_v2i64(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
+;
+; ZVBB-LABEL: bswap_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vrev8.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
@@ -200,6 +226,14 @@ define void @bswap_v16i16(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: bswap_v16i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vrev8.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a)
@@ -304,6 +338,14 @@ define void @bswap_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse32.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: bswap_v8i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vrev8.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a)
@@ -510,6 +552,14 @@ define void @bswap_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: bswap_v4i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vrev8.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 69a71c2e458185..f1a87318d25dc4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -11,6 +11,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
; CHECK-LABEL: ctlz_v16i8:
@@ -54,6 +56,14 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v16i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = load <16 x i8>, ptr %y
%c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
@@ -267,6 +277,14 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
@@ -423,6 +441,14 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v4i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
@@ -621,6 +647,14 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse64.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
@@ -719,6 +753,15 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v32i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: li a1, 32
+; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = load <32 x i8>, ptr %y
%c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false)
@@ -928,6 +971,14 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v16i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false)
@@ -1086,6 +1137,14 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v8i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false)
@@ -1284,6 +1343,14 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vminu.vx v8, v8, a1
; LMULMAX8-NEXT: vse64.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_v4i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
@@ -1332,6 +1399,14 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v10, a1
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v16i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = load <16 x i8>, ptr %y
%c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
@@ -1534,6 +1609,14 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
@@ -1679,6 +1762,14 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v4i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
@@ -1864,6 +1955,14 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse64.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
@@ -1959,6 +2058,15 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v12, a1
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v32i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: li a1, 32
+; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = load <32 x i8>, ptr %y
%c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true)
@@ -2165,6 +2273,14 @@ define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v16i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true)
@@ -2312,6 +2428,14 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v8i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true)
@@ -2497,6 +2621,14 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vrsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse64.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: ctlz_zero_undef_v4i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vclz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index c5ed48ffdffe96..60af2188e754fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
define void @ctpop_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: ctpop_v16i8:
@@ -23,6 +25,14 @@ define void @ctpop_v16i8(ptr %x, ptr %y) {
; CHECK-NEXT: vand.vi v8, v8, 15
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v16i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = load <16 x i8>, ptr %y
%c = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
@@ -135,6 +145,14 @@ define void @ctpop_v8i16(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
@@ -251,6 +269,14 @@ define void @ctpop_v4i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v4i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
@@ -411,6 +437,14 @@ define void @ctpop_v2i64(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
@@ -471,6 +505,15 @@ define void @ctpop_v32i8(ptr %x, ptr %y) {
; LMULMAX1-NEXT: vse8.v v9, (a0)
; LMULMAX1-NEXT: vse8.v v8, (a1)
; LMULMAX1-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v32i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: li a1, 32
+; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = load <32 x i8>, ptr %y
%c = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
@@ -613,6 +656,14 @@ define void @ctpop_v16i16(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v16i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
@@ -759,6 +810,14 @@ define void @ctpop_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse32.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v8i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
@@ -949,6 +1008,14 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
; LMULMAX1-RV64-NEXT: ret
+;
+; ZVBB-LABEL: ctpop_v4i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vcpop.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 5f48a274df3481..de89cb36373fee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -11,6 +11,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV32
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
; CHECK-LABEL: cttz_v16i8:
@@ -54,6 +56,14 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v16i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = load <16 x i8>, ptr %y
%c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
@@ -262,6 +272,14 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
@@ -419,6 +437,14 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v4i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
@@ -640,6 +666,14 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0
; LMULMAX8-RV64-NEXT: vse64.v v8, (a0)
; LMULMAX8-RV64-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
@@ -731,6 +765,15 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v32i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: li a1, 32
+; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = load <32 x i8>, ptr %y
%c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
@@ -911,6 +954,14 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v16i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
@@ -1070,6 +1121,14 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v8i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
@@ -1291,6 +1350,14 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0
; LMULMAX8-RV64-NEXT: vse64.v v8, (a0)
; LMULMAX8-RV64-NEXT: ret
+;
+; ZVBB-LABEL: cttz_v4i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
@@ -1339,6 +1406,14 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vsub.vx v8, v10, a1
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v16i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = load <16 x i8>, ptr %y
%c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
@@ -1531,6 +1606,14 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = load <8 x i16>, ptr %y
%c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
@@ -1672,6 +1755,14 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v4i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = load <4 x i32>, ptr %y
%c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
@@ -1873,6 +1964,14 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-RV64-NEXT: vsub.vx v8, v8, a1
; LMULMAX8-RV64-NEXT: vse64.v v8, (a0)
; LMULMAX8-RV64-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
%c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
@@ -1961,6 +2060,15 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vsub.vx v8, v12, a1
; LMULMAX8-NEXT: vse8.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v32i8:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: li a1, 32
+; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; ZVBB-NEXT: vle8.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse8.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = load <32 x i8>, ptr %y
%c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
@@ -2137,6 +2245,14 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse16.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v16i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse16.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <16 x i16>, ptr %x
%b = load <16 x i16>, ptr %y
%c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
@@ -2280,6 +2396,14 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX8-NEXT: vsub.vx v8, v8, a1
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v8i32:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVBB-NEXT: vle32.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse32.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <8 x i32>, ptr %x
%b = load <8 x i32>, ptr %y
%c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
@@ -2481,6 +2605,14 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
; LMULMAX8-RV64-NEXT: vsub.vx v8, v8, a1
; LMULMAX8-RV64-NEXT: vse64.v v8, (a0)
; LMULMAX8-RV64-NEXT: ret
+;
+; ZVBB-LABEL: cttz_zero_undef_v4i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vle64.v v8, (a0)
+; ZVBB-NEXT: vctz.v v8, v8
+; ZVBB-NEXT: vse64.v v8, (a0)
+; ZVBB-NEXT: ret
%a = load <4 x i64>, ptr %x
%b = load <4 x i64>, ptr %y
%c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
More information about the llvm-commits
mailing list