[llvm] [RISCV] Remove codegen for vp_ctlz, vp_cttz, vp_ctpop (PR #189904)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 00:59:26 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
Part of the work to remove trivial VP intrinsics from the RISC-V backend, see https://discourse.llvm.org/t/rfc-remove-codegen-support-for-trivial-vp-intrinsics-in-the-risc-v-backend/87999
This splits off 3 intrinsics from #<!-- -->179622.
Note that vp.cttz is the elementwise version, not vp.cttz.elts.
---
Patch is 932.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/189904.diff
8 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+14-71)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll (+762-962)
- (modified) llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll (+1061-1192)
- (modified) llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll (+994-2350)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll (+657-4246)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll (+869-937)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll (+850-3299)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cb6489fbeae09..a101d669fbd0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1101,24 +1101,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZvbb()) {
setOperationAction(ISD::BITREVERSE, VT, Legal);
setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
- setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
- ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
- VT, Custom);
} else {
setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
- setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
- ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
- VT, Expand);
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
// range of f32.
EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
- setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
- ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
- ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
- VT, Custom);
+ setOperationAction(
+ {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
}
}
@@ -6895,15 +6888,6 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
SDLoc DL(Op);
MVT ContainerVT = VT;
- SDValue Mask, VL;
- if (Op->isVPOpcode()) {
- Mask = Op.getOperand(1);
- if (VT.isFixedLengthVector())
- Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
- Subtarget);
- VL = Op.getOperand(2);
- }
-
// We choose FP type that can represent the value if possible. Otherwise, we
// use rounding to zero conversion for correct exponent of the result.
// TODO: Use f16 for i8 when possible?
@@ -6923,27 +6907,19 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
SDValue Neg = DAG.getNegative(Src, DL, VT);
Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
- } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
- SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
- Src, Mask, VL);
- Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
}
// We have a legal FP type, convert to it.
SDValue FloatVal;
if (FloatVT.bitsGT(VT)) {
- if (Op->isVPOpcode())
- FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
- else
- FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
+ FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
} else {
// Use RTZ to avoid rounding influencing exponent of FloatVal.
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
- if (!Op->isVPOpcode())
- std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue RTZRM =
DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
MVT ContainerFloatVT =
@@ -6958,20 +6934,13 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
- SDValue Exp;
// Restore back to original type. Truncation after SRL is to generate vnsrl.
- if (Op->isVPOpcode()) {
- Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
- DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
- Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
- } else {
- Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
- DAG.getConstant(ShiftAmt, DL, IntVT));
- if (IntVT.bitsLT(VT))
- Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
- else if (IntVT.bitsGT(VT))
- Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
- }
+ SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
+ DAG.getConstant(ShiftAmt, DL, IntVT));
+ if (IntVT.bitsLT(VT))
+ Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
+ else if (IntVT.bitsGT(VT))
+ Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
// The exponent contains log2 of the value in biased form.
unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
@@ -6979,27 +6948,18 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
return DAG.getNode(ISD::SUB, DL, VT, Exp,
DAG.getConstant(ExponentBias, DL, VT));
- if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
- return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
- DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
// For leading zeros, we need to remove the bias and convert from log2 to
// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
unsigned Adjust = ExponentBias + (EltSize - 1);
- SDValue Res;
- if (Op->isVPOpcode())
- Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
- Mask, VL);
- else
- Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
+ SDValue Res =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
// The above result with zero input equals to Adjust which is greater than
// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
if (Op.getOpcode() == ISD::CTLZ)
Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
- else if (Op.getOpcode() == ISD::VP_CTLZ)
- Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
- DAG.getConstant(EltSize, DL, VT), Mask, VL);
+
return Res;
}
@@ -7648,14 +7608,9 @@ static unsigned getRISCVVLOp(SDValue Op) {
VP_CASE(SSUBSAT) // VP_SSUBSAT
VP_CASE(USUBSAT) // VP_USUBSAT
VP_CASE(BSWAP) // VP_BSWAP
- VP_CASE(CTLZ) // VP_CTLZ
- VP_CASE(CTTZ) // VP_CTTZ
- VP_CASE(CTPOP) // VP_CTPOP
case ISD::CTLZ_ZERO_UNDEF:
- case ISD::VP_CTLZ_ZERO_UNDEF:
return RISCVISD::CTLZ_VL;
case ISD::CTTZ_ZERO_UNDEF:
- case ISD::VP_CTTZ_ZERO_UNDEF:
return RISCVISD::CTTZ_VL;
case ISD::FMA:
case ISD::VP_FMA:
@@ -9136,18 +9091,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_BITREVERSE:
case ISD::VP_BSWAP:
return lowerVPOp(Op, DAG);
- case ISD::VP_CTLZ:
- case ISD::VP_CTLZ_ZERO_UNDEF:
- if (Subtarget.hasStdExtZvbb())
- return lowerVPOp(Op, DAG);
- return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
- case ISD::VP_CTTZ:
- case ISD::VP_CTTZ_ZERO_UNDEF:
- if (Subtarget.hasStdExtZvbb())
- return lowerVPOp(Op, DAG);
- return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
- case ISD::VP_CTPOP:
- return lowerVPOp(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
return lowerVPStridedLoad(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 18e0ab9b72608..e9ce271bdc6eb 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -377,9 +377,6 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
Intrinsic::vp_bswap,
Intrinsic::vp_ceil,
Intrinsic::vp_copysign,
- Intrinsic::vp_ctlz,
- Intrinsic::vp_ctpop,
- Intrinsic::vp_cttz,
Intrinsic::vp_cttz_elts,
Intrinsic::vp_fabs,
Intrinsic::vp_fadd,
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index 2ed155a491657..4c82c8abd4edd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -11,25 +11,22 @@
define <vscale x 1 x i8> @vp_ctlz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
-; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
-; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i8> %v
@@ -38,7 +35,7 @@ define <vscale x 1 x i8> @vp_ctlz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1
define <vscale x 1 x i8> @vp_ctlz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv1i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vzext.vf2 v9, v8
; CHECK-NEXT: li a0, 134
; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
@@ -52,7 +49,7 @@ define <vscale x 1 x i8> @vp_ctlz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zer
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
@@ -62,25 +59,22 @@ define <vscale x 1 x i8> @vp_ctlz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zer
define <vscale x 2 x i8> @vp_ctlz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v9, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
-; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
-; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i8> %v
@@ -89,7 +83,7 @@ define <vscale x 2 x i8> @vp_ctlz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1
define <vscale x 2 x i8> @vp_ctlz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv2i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vzext.vf2 v9, v8
; CHECK-NEXT: li a0, 134
; CHECK-NEXT: vfwcvt.f.xu.v v8, v9
@@ -103,7 +97,7 @@ define <vscale x 2 x i8> @vp_ctlz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zer
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl)
@@ -113,25 +107,22 @@ define <vscale x 2 x i8> @vp_ctlz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zer
define <vscale x 4 x i8> @vp_ctlz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: vnsrl.wi v10, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
-; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
-; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i8> %v
@@ -140,7 +131,7 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1
define <vscale x 4 x i8> @vp_ctlz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv4i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: li a0, 134
; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
@@ -154,7 +145,7 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zer
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl)
@@ -164,25 +155,22 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zer
define <vscale x 8 x i8> @vp_ctlz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: vnsrl.wi v12, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t
-; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
-; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
@@ -191,7 +179,7 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1
define <vscale x 8 x i8> @vp_ctlz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv8i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v12, v8
; CHECK-NEXT: li a0, 134
; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
@@ -205,7 +193,7 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zer
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8_unmasked:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl)
@@ -215,25 +203,22 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zer
define <vscale x 16 x i8> @vp_ctlz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v16, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: vnsrl.wi v16, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
-; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x i8> %v
@@ -242,7 +227,7 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
define <vscale x 16 x i8> @vp_ctlz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ctlz_nxv16i8_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vzext.vf2 v16, v8
; CHECK-NEXT: li a0, 134
; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
@...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/189904
More information about the llvm-commits
mailing list