[llvm] 65ffcc0 - [RISCV] Lower VP_CTLZ_ZERO_UNDEF/VP_CTTZ_ZERO_UNDEF/VP_CTLZ by converting to FP and extracting the exponent.
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 18 00:26:08 PDT 2023
Author: LiaoChunyu
Date: 2023-07-18T15:25:59+08:00
New Revision: 65ffcc099c7f91a15e57b013e2713fa1a49540d2
URL: https://github.com/llvm/llvm-project/commit/65ffcc099c7f91a15e57b013e2713fa1a49540d2
DIFF: https://github.com/llvm/llvm-project/commit/65ffcc099c7f91a15e57b013e2713fa1a49540d2.diff
LOG: [RISCV] Lower VP_CTLZ_ZERO_UNDEF/VP_CTTZ_ZERO_UNDEF/VP_CTLZ by converting to FP and extracting the exponent.
D111904, D141585 made RISC-V customized lower vector ISD::CTLZ_ZERO_UNDEF/CTTZ_ZERO_UNDEF/CTLZ
by converting to float and using the float result.
Perhaps VP_CTLZ_ZERO_UNDEF/VP_CTTZ_ZERO_UNDEF/VP_CTLZ could use the similar feature.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D155150
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8384f2958ddbd6..9ad9d9354d9da2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -765,9 +765,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// range of f32.
EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
- setOperationAction(
- {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
- Custom);
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
+ ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
+ ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
+ VT, Custom);
}
}
}
@@ -4285,6 +4286,16 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
unsigned EltSize = VT.getScalarSizeInBits();
SDValue Src = Op.getOperand(0);
SDLoc DL(Op);
+ MVT ContainerVT = VT;
+
+ SDValue Mask, VL;
+ if (Op->isVPOpcode()) {
+ Mask = Op.getOperand(1);
+ if (VT.isFixedLengthVector())
+ Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
+ Subtarget);
+ VL = Op.getOperand(2);
+ }
// We choose FP type that can represent the value if possible. Otherwise, we
// use rounding to zero conversion for correct exponent of the result.
@@ -4305,21 +4316,27 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
SDValue Neg = DAG.getNegative(Src, DL, VT);
Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
+ } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
+ SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Src, Mask, VL);
+ Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
}
// We have a legal FP type, convert to it.
SDValue FloatVal;
if (FloatVT.bitsGT(VT)) {
- FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
+ if (Op->isVPOpcode())
+ FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
+ else
+ FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
} else {
// Use RTZ to avoid rounding influencing exponent of FloatVal.
- MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
-
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ if (!Op->isVPOpcode())
+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue RTZRM =
DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
MVT ContainerFloatVT =
@@ -4333,30 +4350,49 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
- SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
- DAG.getConstant(ShiftAmt, DL, IntVT));
+
+ SDValue Exp;
// Restore back to original type. Truncation after SRL is to generate vnsrl.
- if (IntVT.bitsLT(VT))
- Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
- else if (IntVT.bitsGT(VT))
- Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
+ if (Op->isVPOpcode()) {
+ Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
+ DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
+ Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
+ } else {
+ Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
+ DAG.getConstant(ShiftAmt, DL, IntVT));
+ if (IntVT.bitsLT(VT))
+ Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
+ else if (IntVT.bitsGT(VT))
+ Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
+ }
+
// The exponent contains log2 of the value in biased form.
unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
-
// For trailing zeros, we just need to subtract the bias.
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
return DAG.getNode(ISD::SUB, DL, VT, Exp,
DAG.getConstant(ExponentBias, DL, VT));
+ if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
+ return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
+ DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
// For leading zeros, we need to remove the bias and convert from log2 to
// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
unsigned Adjust = ExponentBias + (EltSize - 1);
- SDValue Res =
- DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
+ SDValue Res;
+ if (Op->isVPOpcode())
+ Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
+ Mask, VL);
+ else
+ Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
+
// The above result with zero input equals to Adjust which is greater than
// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
if (Op.getOpcode() == ISD::CTLZ)
Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
+ else if (Op.getOpcode() == ISD::VP_CTLZ)
+ Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
+ DAG.getConstant(EltSize, DL, VT), Mask, VL);
return Res;
}
@@ -5440,10 +5476,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
case ISD::VP_CTLZ:
case ISD::VP_CTLZ_ZERO_UNDEF:
- return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
+ if (Subtarget.hasStdExtZvbb())
+ return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
+ return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VP_CTTZ:
case ISD::VP_CTTZ_ZERO_UNDEF:
- return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
+ if (Subtarget.hasStdExtZvbb())
+ return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
+ return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VP_CTPOP:
return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index 5ee4cb105da448..5dc5ced40e8f2b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -13,26 +13,19 @@ declare <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vs
define <vscale x 1 x i8> @vp_ctlz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8:
@@ -47,26 +40,16 @@ define <vscale x 1 x i8> @vp_ctlz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1
define <vscale x 1 x i8> @vp_ctlz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv1i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8_unmasked:
@@ -85,26 +68,19 @@ declare <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vs
define <vscale x 2 x i8> @vp_ctlz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8:
@@ -119,26 +95,16 @@ define <vscale x 2 x i8> @vp_ctlz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1
define <vscale x 2 x i8> @vp_ctlz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv2i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8_unmasked:
@@ -157,26 +123,19 @@ declare <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vs
define <vscale x 4 x i8> @vp_ctlz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8:
@@ -191,26 +150,16 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1
define <vscale x 4 x i8> @vp_ctlz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv4i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8_unmasked:
@@ -229,26 +178,19 @@ declare <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vs
define <vscale x 8 x i8> @vp_ctlz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8:
@@ -263,26 +205,16 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1
define <vscale x 8 x i8> @vp_ctlz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv8i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v10
+; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v10, a0
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8_unmasked:
@@ -301,26 +233,19 @@ declare <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8>, i1 immarg,
define <vscale x 16 x i8> @vp_ctlz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8:
@@ -335,26 +260,16 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
define <vscale x 16 x i8> @vp_ctlz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv16i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v10, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: vsrl.vi v10, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: vsrl.vi v10, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v10, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v10, v10, a0
-; CHECK-NEXT: vsub.vv v8, v8, v10
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v10, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v10, v8
-; CHECK-NEXT: vsrl.vi v10, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v10
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v12
+; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v12, a0
+; CHECK-NEXT: li a0, 8
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8_unmasked:
@@ -515,71 +430,19 @@ define <vscale x 64 x i8> @vp_ctlz_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
declare <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
define <vscale x 1 x i16> @vp_ctlz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -591,71 +454,16 @@ define <vscale x 1 x i16> @vp_ctlz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
}
define <vscale x 1 x i16> @vp_ctlz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv1i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv1i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -671,71 +479,19 @@ define <vscale x 1 x i16> @vp_ctlz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
declare <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
define <vscale x 2 x i16> @vp_ctlz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16:
; CHECK-ZVBB: # %bb.0:
@@ -747,71 +503,16 @@ define <vscale x 2 x i16> @vp_ctlz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
}
define <vscale x 2 x i16> @vp_ctlz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -827,71 +528,19 @@ define <vscale x 2 x i16> @vp_ctlz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
declare <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
define <vscale x 4 x i16> @vp_ctlz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16:
; CHECK-ZVBB: # %bb.0:
@@ -903,71 +552,16 @@ define <vscale x 4 x i16> @vp_ctlz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
}
define <vscale x 4 x i16> @vp_ctlz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
+; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -983,71 +577,19 @@ define <vscale x 4 x i16> @vp_ctlz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
declare <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
define <vscale x 8 x i16> @vp_ctlz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16:
; CHECK-ZVBB: # %bb.0:
@@ -1059,71 +601,16 @@ define <vscale x 8 x i16> @vp_ctlz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
}
define <vscale x 8 x i16> @vp_ctlz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
+; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1139,71 +626,19 @@ define <vscale x 8 x i16> @vp_ctlz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
declare <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
define <vscale x 16 x i16> @vp_ctlz_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16:
; CHECK-ZVBB: # %bb.0:
@@ -1215,71 +650,16 @@ define <vscale x 16 x i16> @vp_ctlz_nxv16i16(<vscale x 16 x i16> %va, <vscale x
}
define <vscale x 16 x i16> @vp_ctlz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
+; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1451,77 +831,20 @@ define <vscale x 32 x i16> @vp_ctlz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i
declare <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
define <vscale x 1 x i32> @vp_ctlz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32:
; CHECK-ZVBB: # %bb.0:
@@ -1533,77 +856,17 @@ define <vscale x 1 x i32> @vp_ctlz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x
}
define <vscale x 1 x i32> @vp_ctlz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv1i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv1i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v9, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1619,77 +882,20 @@ define <vscale x 1 x i32> @vp_ctlz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
declare <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
define <vscale x 2 x i32> @vp_ctlz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32:
; CHECK-ZVBB: # %bb.0:
@@ -1701,77 +907,17 @@ define <vscale x 2 x i32> @vp_ctlz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
}
define <vscale x 2 x i32> @vp_ctlz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v10, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1787,77 +933,20 @@ define <vscale x 2 x i32> @vp_ctlz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
declare <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
define <vscale x 4 x i32> @vp_ctlz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32:
; CHECK-ZVBB: # %bb.0:
@@ -1869,77 +958,17 @@ define <vscale x 4 x i32> @vp_ctlz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
}
define <vscale x 4 x i32> @vp_ctlz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v12, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1955,77 +984,20 @@ define <vscale x 4 x i32> @vp_ctlz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
declare <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
define <vscale x 8 x i32> @vp_ctlz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32:
; CHECK-ZVBB: # %bb.0:
@@ -2037,77 +1009,17 @@ define <vscale x 8 x i32> @vp_ctlz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
}
define <vscale x 8 x i32> @vp_ctlz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v16, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -2123,77 +1035,18 @@ define <vscale x 8 x i32> @vp_ctlz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
declare <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
define <vscale x 16 x i32> @vp_ctlz_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: li a0, 158
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32:
; CHECK-ZVBB: # %bb.0:
@@ -2205,77 +1058,18 @@ define <vscale x 16 x i32> @vp_ctlz_nxv16i32(<vscale x 16 x i32> %va, <vscale x
}
define <vscale x 16 x i32> @vp_ctlz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: vsrl.vi v8, v8, 23
+; CHECK-NEXT: li a0, 158
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -2291,120 +1085,19 @@ define <vscale x 16 x i32> @vp_ctlz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i
declare <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
define <vscale x 1 x i64> @vp_ctlz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv1i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv1i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64:
; CHECK-ZVBB: # %bb.0:
@@ -2416,120 +1109,19 @@ define <vscale x 1 x i64> @vp_ctlz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x
}
define <vscale x 1 x i64> @vp_ctlz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv1i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv1i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v9, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -2545,120 +1137,19 @@ define <vscale x 1 x i64> @vp_ctlz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
declare <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
define <vscale x 2 x i64> @vp_ctlz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64:
; CHECK-ZVBB: # %bb.0:
@@ -2670,120 +1161,19 @@ define <vscale x 2 x i64> @vp_ctlz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x
}
define <vscale x 2 x i64> @vp_ctlz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv2i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv2i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v10, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -2799,120 +1189,19 @@ define <vscale x 2 x i64> @vp_ctlz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
declare <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
define <vscale x 4 x i64> @vp_ctlz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv4i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64:
; CHECK-ZVBB: # %bb.0:
@@ -2924,120 +1213,19 @@ define <vscale x 4 x i64> @vp_ctlz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x
}
define <vscale x 4 x i64> @vp_ctlz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv4i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv4i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v12, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -3053,120 +1241,19 @@ define <vscale x 4 x i64> @vp_ctlz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
declare <vscale x 7 x i64> @llvm.vp.ctlz.nxv7i64(<vscale x 7 x i64>, i1 immarg, <vscale x 7 x i1>, i32)
define <vscale x 7 x i64> @vp_ctlz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv7i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv7i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv7i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64:
; CHECK-ZVBB: # %bb.0:
@@ -3178,120 +1265,19 @@ define <vscale x 7 x i64> @vp_ctlz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x
}
define <vscale x 7 x i64> @vp_ctlz_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv7i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv7i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv7i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -3307,120 +1293,19 @@ define <vscale x 7 x i64> @vp_ctlz_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
declare <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
define <vscale x 8 x i64> @vp_ctlz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64:
; CHECK-ZVBB: # %bb.0:
@@ -3432,120 +1317,19 @@ define <vscale x 8 x i64> @vp_ctlz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x
}
define <vscale x 8 x i64> @vp_ctlz_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv8i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv8i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -3561,339 +1345,40 @@ define <vscale x 8 x i64> @vp_ctlz_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32
declare <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
define <vscale x 16 x i64> @vp_ctlz_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv16i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 40
-; RV32-NEXT: mul a1, a1, a2
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb
-; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a2, a1, 3
-; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vx v0, v0, a2
-; RV32-NEXT: lui a2, 349525
-; RV32-NEXT: addi a2, a2, 1365
-; RV32-NEXT: sw a2, 44(sp)
-; RV32-NEXT: sw a2, 40(sp)
-; RV32-NEXT: lui a2, 209715
-; RV32-NEXT: addi a2, a2, 819
-; RV32-NEXT: sw a2, 36(sp)
-; RV32-NEXT: sw a2, 32(sp)
-; RV32-NEXT: lui a2, 61681
-; RV32-NEXT: addi a2, a2, -241
-; RV32-NEXT: sw a2, 28(sp)
-; RV32-NEXT: sw a2, 24(sp)
-; RV32-NEXT: lui a2, 4112
-; RV32-NEXT: addi a2, a2, 257
-; RV32-NEXT: sw a2, 20(sp)
-; RV32-NEXT: sw a2, 16(sp)
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vnot.v v16, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
-; RV32-NEXT: addi a4, sp, 40
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: addi a4, sp, 32
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 3
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 3
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vadd.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
-; RV32-NEXT: vadd.vv v24, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 24
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v16, v0.t
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vmul.vv v16, v24, v16, v0.t
-; RV32-NEXT: li a3, 56
-; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t
-; RV32-NEXT: addi a4, sp, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: bltu a0, a1, .LBB46_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB46_2:
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmv1r.v v0, v1
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 48
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv16i64:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a2, a1, 3
-; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vx v0, v0, a2
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a2, 32
-; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: lui a3, 349525
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vsub.vv v16, v16, v8, v0.t
-; RV64-NEXT: lui a4, 209715
-; RV64-NEXT: addiw a4, a4, 819
-; RV64-NEXT: slli a5, a4, 32
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v8, v16, a4, v0.t
-; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
-; RV64-NEXT: vand.vx v16, v16, a4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a5, 61681
-; RV64-NEXT: addiw a5, a5, -241
-; RV64-NEXT: slli a6, a5, 32
-; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vand.vx v8, v8, a5, v0.t
-; RV64-NEXT: lui a6, 4112
-; RV64-NEXT: addiw a6, a6, 257
-; RV64-NEXT: slli a7, a6, 32
-; RV64-NEXT: add a6, a6, a7
-; RV64-NEXT: vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT: li a7, 56
-; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT: addi t0, sp, 16
-; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
-; RV64-NEXT: bltu a0, a1, .LBB46_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB46_2:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vand.vx v16, v16, a3, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a4, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a4, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v8, v8, a5, v0.t
-; RV64-NEXT: vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: fsrmi a3, 1
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t
+; CHECK-NEXT: fsrm a3
+; CHECK-NEXT: li a2, 52
+; CHECK-NEXT: vsrl.vx v16, v16, a2, v0.t
+; CHECK-NEXT: li a3, 1086
+; CHECK-NEXT: vrsub.vx v16, v16, a3, v0.t
+; CHECK-NEXT: li a4, 64
+; CHECK-NEXT: vminu.vx v16, v16, a4, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB46_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB46_2:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a4, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64:
; CHECK-ZVBB: # %bb.0:
@@ -3921,233 +1406,35 @@ define <vscale x 16 x i64> @vp_ctlz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
}
define <vscale x 16 x i64> @vp_ctlz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv16i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
-; RV32-NEXT: sw a1, 40(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
-; RV32-NEXT: sw a1, 32(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v16, 1
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 2
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 4
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 8
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 16
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v24, v16, a2
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vnot.v v16, v16
-; RV32-NEXT: vsrl.vi v24, v16, 1
-; RV32-NEXT: addi a4, sp, 40
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v0
-; RV32-NEXT: vsub.vv v16, v16, v24
-; RV32-NEXT: addi a4, sp, 32
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v16, v0
-; RV32-NEXT: vsrl.vi v16, v16, 2
-; RV32-NEXT: vand.vv v16, v16, v0
-; RV32-NEXT: vadd.vv v16, v24, v16
-; RV32-NEXT: vsrl.vi v24, v16, 4
-; RV32-NEXT: vadd.vv v16, v16, v24
-; RV32-NEXT: addi a4, sp, 24
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v16, v24
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vmul.vv v24, v24, v16
-; RV32-NEXT: li a3, 56
-; RV32-NEXT: vsrl.vx v16, v24, a3
-; RV32-NEXT: addi a4, sp, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: bltu a0, a1, .LBB47_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB47_2:
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vx v24, v8, a2
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v24, v16
-; RV32-NEXT: vsub.vv v8, v8, v24
-; RV32-NEXT: vand.vv v24, v8, v0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v0
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v24
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsrl.vx v8, v8, a3
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 48
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv16i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 2
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 8
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 16
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: li a2, 32
-; RV64-NEXT: vsrl.vx v24, v16, a2
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vnot.v v16, v16
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: lui a3, 349525
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vsub.vv v16, v16, v24
-; RV64-NEXT: lui a4, 209715
-; RV64-NEXT: addiw a4, a4, 819
-; RV64-NEXT: slli a5, a4, 32
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v24, v16, a4
-; RV64-NEXT: vsrl.vi v16, v16, 2
-; RV64-NEXT: vand.vx v16, v16, a4
-; RV64-NEXT: vadd.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vadd.vv v16, v16, v24
-; RV64-NEXT: lui a5, 61681
-; RV64-NEXT: addiw a5, a5, -241
-; RV64-NEXT: slli a6, a5, 32
-; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vand.vx v16, v16, a5
-; RV64-NEXT: lui a6, 4112
-; RV64-NEXT: addiw a6, a6, 257
-; RV64-NEXT: slli a7, a6, 32
-; RV64-NEXT: add a6, a6, a7
-; RV64-NEXT: vmul.vx v16, v16, a6
-; RV64-NEXT: li a7, 56
-; RV64-NEXT: vsrl.vx v16, v16, a7
-; RV64-NEXT: bltu a0, a1, .LBB47_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB47_2:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vx v24, v8, a2
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vsub.vv v8, v8, v24
-; RV64-NEXT: vand.vx v24, v8, a4
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a4
-; RV64-NEXT: vadd.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v24
-; RV64-NEXT: vand.vx v8, v8, a5
-; RV64-NEXT: vmul.vx v8, v8, a6
-; RV64-NEXT: vsrl.vx v8, v8, a7
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: fsrmi a3, 1
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v16, v16
+; CHECK-NEXT: fsrm a3
+; CHECK-NEXT: li a2, 52
+; CHECK-NEXT: vsrl.vx v16, v16, a2
+; CHECK-NEXT: li a3, 1086
+; CHECK-NEXT: vrsub.vx v16, v16, a3
+; CHECK-NEXT: li a4, 64
+; CHECK-NEXT: vminu.vx v16, v16, a4
+; CHECK-NEXT: bltu a0, a1, .LBB47_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB47_2:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: vsrl.vx v8, v8, a2
+; CHECK-NEXT: vrsub.vx v8, v8, a3
+; CHECK-NEXT: vminu.vx v8, v8, a4
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -4174,26 +1461,17 @@ define <vscale x 16 x i64> @vp_ctlz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
define <vscale x 1 x i8> @vp_ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8:
@@ -4208,26 +1486,14 @@ define <vscale x 1 x i8> @vp_ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vsca
define <vscale x 1 x i8> @vp_ctlz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked:
@@ -4245,26 +1511,17 @@ define <vscale x 1 x i8> @vp_ctlz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %
define <vscale x 2 x i8> @vp_ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8:
@@ -4279,26 +1536,14 @@ define <vscale x 2 x i8> @vp_ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vsca
define <vscale x 2 x i8> @vp_ctlz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked:
@@ -4316,26 +1561,17 @@ define <vscale x 2 x i8> @vp_ctlz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %
define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8:
@@ -4350,26 +1586,14 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked:
@@ -4387,26 +1611,17 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8:
@@ -4421,26 +1636,14 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v10
+; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked:
@@ -4458,26 +1661,17 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
-; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8:
@@ -4492,26 +1686,14 @@ define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v10, v8, 1
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: vsrl.vi v10, v8, 2
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: vsrl.vi v10, v8, 4
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v10, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v10, v10, a0
-; CHECK-NEXT: vsub.vv v8, v8, v10
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v10, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v10, v8
-; CHECK-NEXT: vsrl.vi v10, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v10
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v12
+; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: li a0, 134
+; CHECK-NEXT: vrsub.vx v8, v12, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked:
@@ -4669,71 +1851,17 @@ define <vscale x 64 x i8> @vp_ctlz_zero_undef_nxv64i8_unmasked(<vscale x 64 x i8
define <vscale x 1 x i16> @vp_ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -4745,71 +1873,14 @@ define <vscale x 1 x i16> @vp_ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <v
}
define <vscale x 1 x i16> @vp_ctlz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -4824,71 +1895,17 @@ define <vscale x 1 x i16> @vp_ctlz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16
define <vscale x 2 x i16> @vp_ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16:
; CHECK-ZVBB: # %bb.0:
@@ -4900,71 +1917,14 @@ define <vscale x 2 x i16> @vp_ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <v
}
define <vscale x 2 x i16> @vp_ctlz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -4979,71 +1939,17 @@ define <vscale x 2 x i16> @vp_ctlz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16
define <vscale x 4 x i16> @vp_ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16:
; CHECK-ZVBB: # %bb.0:
@@ -5055,71 +1961,14 @@ define <vscale x 4 x i16> @vp_ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <v
}
define <vscale x 4 x i16> @vp_ctlz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
+; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -5130,151 +1979,40 @@ define <vscale x 4 x i16> @vp_ctlz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
-}
-
-
-define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i16> %v
-}
-
-define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+}
+
+
+define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
+; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -5289,71 +2027,17 @@ define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16
define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16:
; CHECK-ZVBB: # %bb.0:
@@ -5365,71 +2049,14 @@ define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va,
}
define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
+; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -5599,77 +2226,18 @@ define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16_unmasked(<vscale x 32 x
define <vscale x 1 x i32> @vp_ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32:
; CHECK-ZVBB: # %bb.0:
@@ -5681,77 +2249,15 @@ define <vscale x 1 x i32> @vp_ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <v
}
define <vscale x 1 x i32> @vp_ctlz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v9, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -5766,77 +2272,18 @@ define <vscale x 1 x i32> @vp_ctlz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32
define <vscale x 2 x i32> @vp_ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32:
; CHECK-ZVBB: # %bb.0:
@@ -5848,77 +2295,15 @@ define <vscale x 2 x i32> @vp_ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <v
}
define <vscale x 2 x i32> @vp_ctlz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v10, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -5933,77 +2318,18 @@ define <vscale x 2 x i32> @vp_ctlz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32
define <vscale x 4 x i32> @vp_ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32:
; CHECK-ZVBB: # %bb.0:
@@ -6015,162 +2341,41 @@ define <vscale x 4 x i32> @vp_ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <v
}
define <vscale x 4 x i32> @vp_ctlz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
- ret <vscale x 4 x i32> %v
-}
-
-
-define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v12, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+
+define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32:
; CHECK-ZVBB: # %bb.0:
@@ -6182,77 +2387,15 @@ define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <v
}
define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v16, a0
+; CHECK-NEXT: li a0, 1054
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -6267,77 +2410,16 @@ define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32
define <vscale x 16 x i32> @vp_ctlz_zero_undef_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: li a0, 158
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32:
; CHECK-ZVBB: # %bb.0:
@@ -6349,77 +2431,16 @@ define <vscale x 16 x i32> @vp_ctlz_zero_undef_nxv16i32(<vscale x 16 x i32> %va,
}
define <vscale x 16 x i32> @vp_ctlz_zero_undef_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: vsrl.vi v8, v8, 23
+; CHECK-NEXT: li a0, 158
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -6434,120 +2455,17 @@ define <vscale x 16 x i32> @vp_ctlz_zero_undef_nxv16i32_unmasked(<vscale x 16 x
define <vscale x 1 x i64> @vp_ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64:
; CHECK-ZVBB: # %bb.0:
@@ -6559,120 +2477,17 @@ define <vscale x 1 x i64> @vp_ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <v
}
define <vscale x 1 x i64> @vp_ctlz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v9, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -6687,120 +2502,17 @@ define <vscale x 1 x i64> @vp_ctlz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64
define <vscale x 2 x i64> @vp_ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64:
; CHECK-ZVBB: # %bb.0:
@@ -6812,120 +2524,17 @@ define <vscale x 2 x i64> @vp_ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <v
}
define <vscale x 2 x i64> @vp_ctlz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v10, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -6940,120 +2549,17 @@ define <vscale x 2 x i64> @vp_ctlz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64
define <vscale x 4 x i64> @vp_ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64:
; CHECK-ZVBB: # %bb.0:
@@ -7062,123 +2568,20 @@ define <vscale x 4 x i64> @vp_ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <v
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i64> %v
-}
-
-define <vscale x 4 x i64> @vp_ctlz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v12, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+}
+
+define <vscale x 4 x i64> @vp_ctlz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -7193,120 +2596,17 @@ define <vscale x 4 x i64> @vp_ctlz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64
define <vscale x 7 x i64> @vp_ctlz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv7i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64:
; CHECK-ZVBB: # %bb.0:
@@ -7318,120 +2618,17 @@ define <vscale x 7 x i64> @vp_ctlz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <v
}
define <vscale x 7 x i64> @vp_ctlz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -7446,120 +2643,17 @@ define <vscale x 7 x i64> @vp_ctlz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64
define <vscale x 8 x i64> @vp_ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64:
; CHECK-ZVBB: # %bb.0:
@@ -7571,120 +2665,17 @@ define <vscale x 8 x i64> @vp_ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <v
}
define <vscale x 8 x i64> @vp_ctlz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1086
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -7698,339 +2689,37 @@ define <vscale x 8 x i64> @vp_ctlz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64
}
define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 40
-; RV32-NEXT: mul a1, a1, a2
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb
-; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a2, a1, 3
-; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vx v0, v0, a2
-; RV32-NEXT: lui a2, 349525
-; RV32-NEXT: addi a2, a2, 1365
-; RV32-NEXT: sw a2, 44(sp)
-; RV32-NEXT: sw a2, 40(sp)
-; RV32-NEXT: lui a2, 209715
-; RV32-NEXT: addi a2, a2, 819
-; RV32-NEXT: sw a2, 36(sp)
-; RV32-NEXT: sw a2, 32(sp)
-; RV32-NEXT: lui a2, 61681
-; RV32-NEXT: addi a2, a2, -241
-; RV32-NEXT: sw a2, 28(sp)
-; RV32-NEXT: sw a2, 24(sp)
-; RV32-NEXT: lui a2, 4112
-; RV32-NEXT: addi a2, a2, 257
-; RV32-NEXT: sw a2, 20(sp)
-; RV32-NEXT: sw a2, 16(sp)
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vnot.v v16, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
-; RV32-NEXT: addi a4, sp, 40
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: addi a4, sp, 32
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 3
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 3
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vadd.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
-; RV32-NEXT: vadd.vv v24, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 24
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v16, v0.t
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vmul.vv v16, v24, v16, v0.t
-; RV32-NEXT: li a3, 56
-; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t
-; RV32-NEXT: addi a4, sp, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: bltu a0, a1, .LBB94_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB94_2:
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmv1r.v v0, v1
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 48
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a2, a1, 3
-; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vx v0, v0, a2
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a2, 32
-; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: lui a3, 349525
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vsub.vv v16, v16, v8, v0.t
-; RV64-NEXT: lui a4, 209715
-; RV64-NEXT: addiw a4, a4, 819
-; RV64-NEXT: slli a5, a4, 32
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v8, v16, a4, v0.t
-; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
-; RV64-NEXT: vand.vx v16, v16, a4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a5, 61681
-; RV64-NEXT: addiw a5, a5, -241
-; RV64-NEXT: slli a6, a5, 32
-; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vand.vx v8, v8, a5, v0.t
-; RV64-NEXT: lui a6, 4112
-; RV64-NEXT: addiw a6, a6, 257
-; RV64-NEXT: slli a7, a6, 32
-; RV64-NEXT: add a6, a6, a7
-; RV64-NEXT: vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT: li a7, 56
-; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT: addi t0, sp, 16
-; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
-; RV64-NEXT: bltu a0, a1, .LBB94_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB94_2:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vand.vx v16, v16, a3, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a4, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a4, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v8, v8, a5, v0.t
-; RV64-NEXT: vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: fsrmi a3, 1
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t
+; CHECK-NEXT: fsrm a3
+; CHECK-NEXT: li a2, 52
+; CHECK-NEXT: vsrl.vx v16, v16, a2, v0.t
+; CHECK-NEXT: li a3, 1086
+; CHECK-NEXT: vrsub.vx v16, v16, a3, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB94_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB94_2:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64:
; CHECK-ZVBB: # %bb.0:
@@ -8058,233 +2747,32 @@ define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
}
define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
-; RV32-NEXT: sw a1, 40(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
-; RV32-NEXT: sw a1, 32(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v16, 1
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 2
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 4
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 8
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 16
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v24, v16, a2
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vnot.v v16, v16
-; RV32-NEXT: vsrl.vi v24, v16, 1
-; RV32-NEXT: addi a4, sp, 40
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v0
-; RV32-NEXT: vsub.vv v16, v16, v24
-; RV32-NEXT: addi a4, sp, 32
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v16, v0
-; RV32-NEXT: vsrl.vi v16, v16, 2
-; RV32-NEXT: vand.vv v16, v16, v0
-; RV32-NEXT: vadd.vv v16, v24, v16
-; RV32-NEXT: vsrl.vi v24, v16, 4
-; RV32-NEXT: vadd.vv v16, v16, v24
-; RV32-NEXT: addi a4, sp, 24
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v16, v24
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vmul.vv v24, v24, v16
-; RV32-NEXT: li a3, 56
-; RV32-NEXT: vsrl.vx v16, v24, a3
-; RV32-NEXT: addi a4, sp, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: bltu a0, a1, .LBB95_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB95_2:
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vx v24, v8, a2
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v24, v16
-; RV32-NEXT: vsub.vv v8, v8, v24
-; RV32-NEXT: vand.vv v24, v8, v0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v0
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v24
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsrl.vx v8, v8, a3
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 48
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 2
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 8
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 16
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: li a2, 32
-; RV64-NEXT: vsrl.vx v24, v16, a2
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vnot.v v16, v16
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: lui a3, 349525
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vsub.vv v16, v16, v24
-; RV64-NEXT: lui a4, 209715
-; RV64-NEXT: addiw a4, a4, 819
-; RV64-NEXT: slli a5, a4, 32
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v24, v16, a4
-; RV64-NEXT: vsrl.vi v16, v16, 2
-; RV64-NEXT: vand.vx v16, v16, a4
-; RV64-NEXT: vadd.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vadd.vv v16, v16, v24
-; RV64-NEXT: lui a5, 61681
-; RV64-NEXT: addiw a5, a5, -241
-; RV64-NEXT: slli a6, a5, 32
-; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vand.vx v16, v16, a5
-; RV64-NEXT: lui a6, 4112
-; RV64-NEXT: addiw a6, a6, 257
-; RV64-NEXT: slli a7, a6, 32
-; RV64-NEXT: add a6, a6, a7
-; RV64-NEXT: vmul.vx v16, v16, a6
-; RV64-NEXT: li a7, 56
-; RV64-NEXT: vsrl.vx v16, v16, a7
-; RV64-NEXT: bltu a0, a1, .LBB95_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB95_2:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vx v24, v8, a2
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vsub.vv v8, v8, v24
-; RV64-NEXT: vand.vx v24, v8, a4
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a4
-; RV64-NEXT: vadd.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v24
-; RV64-NEXT: vand.vx v8, v8, a5
-; RV64-NEXT: vmul.vx v8, v8, a6
-; RV64-NEXT: vsrl.vx v8, v8, a7
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: fsrmi a3, 1
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v16, v16
+; CHECK-NEXT: fsrm a3
+; CHECK-NEXT: li a2, 52
+; CHECK-NEXT: vsrl.vx v16, v16, a2
+; CHECK-NEXT: li a3, 1086
+; CHECK-NEXT: vrsub.vx v16, v16, a3
+; CHECK-NEXT: bltu a0, a1, .LBB95_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB95_2:
+; CHECK-NEXT: fsrmi a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: vsrl.vx v8, v8, a2
+; CHECK-NEXT: vrsub.vx v8, v8, a3
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked:
; CHECK-ZVBB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 38647bb21f7f60..afe2d5cdb1c5f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -3577,23 +3577,20 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8:
@@ -3608,23 +3605,17 @@ define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vsca
define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1
-; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked:
@@ -3642,23 +3633,20 @@ define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %
define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8:
@@ -3673,23 +3661,17 @@ define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vsca
define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1
-; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked:
@@ -3707,23 +3689,20 @@ define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %
define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8:
@@ -3738,23 +3717,17 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1
-; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked:
@@ -3772,23 +3745,20 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8:
@@ -3803,23 +3773,17 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vsub.vx v9, v8, a1
-; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v9, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: vsub.vv v8, v8, v9
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v9, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vsrl.vi v9, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v9
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v10
+; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked:
@@ -3837,23 +3801,20 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
-; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8:
@@ -3868,23 +3829,17 @@ define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vsub.vx v10, v8, a1
-; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vrsub.vi v10, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v10
-; CHECK-NEXT: vsrl.vi v10, v8, 1
-; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vx v10, v10, a0
-; CHECK-NEXT: vsub.vv v8, v8, v10
-; CHECK-NEXT: li a0, 51
-; CHECK-NEXT: vand.vx v10, v8, a0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vadd.vv v8, v10, v8
-; CHECK-NEXT: vsrl.vi v10, v8, 4
-; CHECK-NEXT: vadd.vv v8, v8, v10
-; CHECK-NEXT: vand.vi v8, v8, 15
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v12
+; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v12, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked:
@@ -4030,61 +3985,19 @@ define <vscale x 64 x i8> @vp_cttz_zero_undef_nxv64i8_unmasked(<vscale x 64 x i8
define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -4096,61 +4009,16 @@ define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <v
}
define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -4165,26 +4033,218 @@ define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16
define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv2i16:
+; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+
+define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
+; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+
+define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
+; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+
+define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v12, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v12
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
+; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+
+define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_cttz_zero_undef_nxv32i16:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
+; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: lui a0, 5
; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV32-NEXT: vand.vx v16, v16, a0, v0.t
+; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
; RV32-NEXT: lui a0, 3
; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
+; RV32-NEXT: vand.vx v16, v8, a0, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: lui a0, 1
; RV32-NEXT: addi a0, a0, -241
; RV32-NEXT: vand.vx v8, v8, a0, v0.t
@@ -4193,26 +4253,26 @@ define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <v
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_cttz_zero_undef_nxv2i16:
+; RV64-LABEL: vp_cttz_zero_undef_nxv32i16:
; RV64: # %bb.0:
; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
+; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV64-NEXT: vand.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV64-NEXT: lui a0, 5
; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV64-NEXT: vand.vx v16, v16, a0, v0.t
+; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
; RV64-NEXT: lui a0, 3
; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
+; RV64-NEXT: vand.vx v16, v8, a0, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
; RV64-NEXT: lui a0, 1
; RV64-NEXT: addiw a0, a0, -241
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
@@ -4221,36 +4281,36 @@ define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <v
; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV64-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
- ret <vscale x 2 x i16> %v
+ %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
}
-define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
+define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
+; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; RV32-NEXT: vsub.vx v16, v8, a1
; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
+; RV32-NEXT: vand.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 1
; RV32-NEXT: lui a0, 5
; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
+; RV32-NEXT: vand.vx v16, v16, a0
+; RV32-NEXT: vsub.vv v8, v8, v16
; RV32-NEXT: lui a0, 3
; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
+; RV32-NEXT: vand.vx v16, v8, a0
; RV32-NEXT: vsrl.vi v8, v8, 2
; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
+; RV32-NEXT: vadd.vv v8, v16, v8
+; RV32-NEXT: vsrl.vi v16, v8, 4
+; RV32-NEXT: vadd.vv v8, v8, v16
; RV32-NEXT: lui a0, 1
; RV32-NEXT: addi a0, a0, -241
; RV32-NEXT: vand.vx v8, v8, a0
@@ -4259,26 +4319,26 @@ define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16
; RV32-NEXT: vsrl.vi v8, v8, 8
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
+; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
; RV64: # %bb.0:
; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
+; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; RV64-NEXT: vsub.vx v16, v8, a1
; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
+; RV64-NEXT: vand.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 1
; RV64-NEXT: lui a0, 5
; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
+; RV64-NEXT: vand.vx v16, v16, a0
+; RV64-NEXT: vsub.vv v8, v8, v16
; RV64-NEXT: lui a0, 3
; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
+; RV64-NEXT: vand.vx v16, v8, a0
; RV64-NEXT: vsrl.vi v8, v8, 2
; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
+; RV64-NEXT: vadd.vv v8, v16, v8
+; RV64-NEXT: vsrl.vi v16, v8, 4
+; RV64-NEXT: vadd.vv v8, v8, v16
; RV64-NEXT: lui a0, 1
; RV64-NEXT: addiw a0, a0, -241
; RV64-NEXT: vand.vx v8, v8, a0
@@ -4287,2596 +4347,581 @@ define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16
; RV64-NEXT: vsrl.vi v8, v8, 8
; RV64-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8
; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
- ret <vscale x 2 x i16> %v
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
}
-define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
- ret <vscale x 4 x i16> %v
+ %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
}
-define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v9, a0
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8
; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
- ret <vscale x 4 x i16> %v
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
}
-define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i16> %v
+ %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
}
-define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v10, a0
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8
; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i16> %v
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
}
-define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
+define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT: ret
;
-; RV64-LABEL: vp_cttz_zero_undef_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v12, a0
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+
+define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
- ret <vscale x 16 x i16> %v
+ %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
}
-define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
+define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v12, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v12
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
+; CHECK-NEXT: li a0, 52
+; CHECK-NEXT: vnsrl.wx v8, v16, a0
+; CHECK-NEXT: li a0, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
;
-; RV64-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+
+define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
+; CHECK-NEXT: li a1, 127
+; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v16, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: vsrl.vi v8, v8, 23
+; CHECK-NEXT: li a1, 127
+; CHECK-NEXT: vsub.vx v8, v8, a1
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
- ret <vscale x 16 x i16> %v
+ %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
}
-define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
- ret <vscale x 32 x i16> %v
+ %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
}
-define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8
; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
- %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
- ret <vscale x 32 x i16> %v
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
}
-define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
- ret <vscale x 1 x i32> %v
+ %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
}
-define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8
; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
- ret <vscale x 1 x i32> %v
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
}
-define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
+define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
-; RV64-LABEL: vp_cttz_zero_undef_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
- ret <vscale x 2 x i32> %v
-}
-
-define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
- ret <vscale x 2 x i32> %v
-}
-
-
-define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
- ret <vscale x 4 x i32> %v
+ %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
}
-define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v12, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v12
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
- ret <vscale x 4 x i32> %v
-}
-
-
-define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i32> %v
-}
-
-define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i32> %v
-}
-
-
-define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
- ret <vscale x 16 x i32> %v
-}
-
-define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
- ret <vscale x 16 x i32> %v
-}
-
-
-define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv1i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv1i64:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
- ret <vscale x 1 x i64> %v
-}
-
-define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v9, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
- ret <vscale x 1 x i64> %v
-}
-
-
-define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
- ret <vscale x 2 x i64> %v
-}
-
-define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma
-; RV32-NEXT: vlse64.v v10, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
- ret <vscale x 2 x i64> %v
-}
-
-
-define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv4i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
- ret <vscale x 4 x i64> %v
-}
-
-define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; RV32-NEXT: vlse64.v v12, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
- ret <vscale x 4 x i64> %v
-}
-
-
-define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv7i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv7i64:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
- ret <vscale x 7 x i64> %v
-}
-
-define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
- %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
- ret <vscale x 7 x i64> %v
-}
-
-
-define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: ret
- %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i64> %v
-}
-
-define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i64> %v
-}
-
-define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv16i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 40
-; RV32-NEXT: mul a1, a1, a2
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb
-; RV32-NEXT: vmv1r.v v1, v0
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a2, a1, 3
-; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vx v0, v0, a2
-; RV32-NEXT: lui a2, 349525
-; RV32-NEXT: addi a2, a2, 1365
-; RV32-NEXT: sw a2, 44(sp)
-; RV32-NEXT: sw a2, 40(sp)
-; RV32-NEXT: lui a2, 209715
-; RV32-NEXT: addi a2, a2, 819
-; RV32-NEXT: sw a2, 36(sp)
-; RV32-NEXT: sw a2, 32(sp)
-; RV32-NEXT: lui a2, 61681
-; RV32-NEXT: addi a2, a2, -241
-; RV32-NEXT: sw a2, 28(sp)
-; RV32-NEXT: sw a2, 24(sp)
-; RV32-NEXT: lui a2, 4112
-; RV32-NEXT: addi a2, a2, 257
-; RV32-NEXT: sw a2, 20(sp)
-; RV32-NEXT: sw a2, 16(sp)
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: li a2, 1
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vsub.vx v24, v16, a2, v0.t
-; RV32-NEXT: vnot.v v16, v16, v0.t
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: addi a4, sp, 40
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v24, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: addi a4, sp, 32
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v24, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; RV32-NEXT: vadd.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t
-; RV32-NEXT: vadd.vv v24, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 24
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v16, v0.t
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vmul.vv v16, v24, v16, v0.t
-; RV32-NEXT: li a3, 56
-; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t
-; RV32-NEXT: addi a4, sp, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: bltu a0, a1, .LBB94_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB94_2:
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmv1r.v v0, v1
-; RV32-NEXT: vsub.vx v16, v8, a2, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 48
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv16i64:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a2, a1, 3
-; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vx v0, v0, a2
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a3, a3, a2
-; RV64-NEXT: li a2, 1
-; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT: vsub.vx v8, v16, a2, v0.t
-; RV64-NEXT: vnot.v v16, v16, v0.t
-; RV64-NEXT: vand.vv v16, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: lui a3, 349525
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vsub.vv v16, v16, v8, v0.t
-; RV64-NEXT: lui a4, 209715
-; RV64-NEXT: addiw a4, a4, 819
-; RV64-NEXT: slli a5, a4, 32
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v8, v16, a4, v0.t
-; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t
-; RV64-NEXT: vand.vx v16, v16, a4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a5, 61681
-; RV64-NEXT: addiw a5, a5, -241
-; RV64-NEXT: slli a6, a5, 32
-; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vand.vx v8, v8, a5, v0.t
-; RV64-NEXT: lui a6, 4112
-; RV64-NEXT: addiw a6, a6, 257
-; RV64-NEXT: slli a7, a6, 32
-; RV64-NEXT: add a6, a6, a7
-; RV64-NEXT: vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT: li a7, 56
-; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT: addi t0, sp, 16
-; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
-; RV64-NEXT: bltu a0, a1, .LBB94_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB94_2:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsub.vx v16, v8, a2, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vand.vx v16, v16, a3, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a4, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a4, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v8, v8, a5, v0.t
-; RV64-NEXT: vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+
+define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
+ ret <vscale x 7 x i64> %v
+}
+
+define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v16, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
+ %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
+ ret <vscale x 7 x i64> %v
+}
+
+
+define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v16, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: li a1, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a1
+; CHECK-NEXT: li a1, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a1
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v16, v8, v0.t
+; CHECK-NEXT: fsrmi a2, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: li a2, 52
+; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t
+; CHECK-NEXT: li a3, 1023
+; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t
+; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a0, a1, .LBB94_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB94_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64:
; CHECK-ZVBB: # %bb.0:
@@ -6904,193 +4949,36 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
}
define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
-; RV32-NEXT: sw a1, 40(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
-; RV32-NEXT: sw a1, 32(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a3, a3, a2
-; RV32-NEXT: li a2, 1
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vsub.vx v24, v16, a2
-; RV32-NEXT: vnot.v v16, v16
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 1
-; RV32-NEXT: addi a4, sp, 40
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
-; RV32-NEXT: mul a4, a4, a5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v24, v0
-; RV32-NEXT: vsub.vv v16, v16, v24
-; RV32-NEXT: addi a4, sp, 32
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v16, v0
-; RV32-NEXT: vsrl.vi v16, v16, 2
-; RV32-NEXT: vand.vv v16, v16, v0
-; RV32-NEXT: vadd.vv v16, v24, v16
-; RV32-NEXT: vsrl.vi v24, v16, 4
-; RV32-NEXT: vadd.vv v16, v16, v24
-; RV32-NEXT: addi a4, sp, 24
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 48
-; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v16, v24
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a4), zero
-; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vmul.vv v24, v24, v16
-; RV32-NEXT: li a3, 56
-; RV32-NEXT: vsrl.vx v16, v24, a3
-; RV32-NEXT: addi a4, sp, 48
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: bltu a0, a1, .LBB95_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB95_2:
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsub.vx v24, v8, a2
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v24, v16
-; RV32-NEXT: vsub.vv v8, v8, v24
-; RV32-NEXT: vand.vv v24, v8, v0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v0
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v24
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsrl.vx v8, v8, a3
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 48
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a3, a3, a2
-; RV64-NEXT: li a2, 1
-; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT: vsub.vx v24, v16, a2
-; RV64-NEXT: vnot.v v16, v16
-; RV64-NEXT: vand.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: lui a3, 349525
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vsub.vv v16, v16, v24
-; RV64-NEXT: lui a4, 209715
-; RV64-NEXT: addiw a4, a4, 819
-; RV64-NEXT: slli a5, a4, 32
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v24, v16, a4
-; RV64-NEXT: vsrl.vi v16, v16, 2
-; RV64-NEXT: vand.vx v16, v16, a4
-; RV64-NEXT: vadd.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vadd.vv v16, v16, v24
-; RV64-NEXT: lui a5, 61681
-; RV64-NEXT: addiw a5, a5, -241
-; RV64-NEXT: slli a6, a5, 32
-; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vand.vx v16, v16, a5
-; RV64-NEXT: lui a6, 4112
-; RV64-NEXT: addiw a6, a6, 257
-; RV64-NEXT: slli a7, a6, 32
-; RV64-NEXT: add a6, a6, a7
-; RV64-NEXT: vmul.vx v16, v16, a6
-; RV64-NEXT: li a7, 56
-; RV64-NEXT: vsrl.vx v16, v16, a7
-; RV64-NEXT: bltu a0, a1, .LBB95_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB95_2:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsub.vx v24, v8, a2
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vsub.vv v8, v8, v24
-; RV64-NEXT: vand.vx v24, v8, a4
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a4
-; RV64-NEXT: vadd.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v24
-; RV64-NEXT: vand.vx v8, v8, a5
-; RV64-NEXT: vmul.vx v8, v8, a6
-; RV64-NEXT: vsrl.vx v8, v8, a7
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v24, v16, 0
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: fsrmi a2, 1
+; CHECK-NEXT: vfcvt.f.xu.v v16, v16
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: li a2, 52
+; CHECK-NEXT: vsrl.vx v16, v16, a2
+; CHECK-NEXT: li a3, 1023
+; CHECK-NEXT: vsub.vx v16, v16, a3
+; CHECK-NEXT: bltu a0, a1, .LBB95_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB95_2:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v24, v8, 0
+; CHECK-NEXT: vand.vv v8, v8, v24
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: vsrl.vx v8, v8, a2
+; CHECK-NEXT: vsub.vx v8, v8, a3
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked:
; CHECK-ZVBB: # %bb.0:
More information about the llvm-commits
mailing list