[llvm] 5b9af38 - [RISCV] Provide a more efficient lowering for experimental.cttz.elts. (#88552)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 15 18:38:57 PDT 2024
Author: Craig Topper
Date: 2024-04-15T18:38:54-07:00
New Revision: 5b9af38a03c8119cc2a42ae80d4a25e6f454c721
URL: https://github.com/llvm/llvm-project/commit/5b9af38a03c8119cc2a42ae80d4a25e6f454c721
DIFF: https://github.com/llvm/llvm-project/commit/5b9af38a03c8119cc2a42ae80d4a25e6f454c721.diff
LOG: [RISCV] Provide a more efficient lowering for experimental.cttz.elts. (#88552)
For experimental.cttz.elts, we can use a vfirst instruction, but we need
to correct the result if input vector can be 0. cttz.elts returns the
vector length while vfirst returns -1.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 259cc388276c69..f6ed6420c9e1fa 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1484,6 +1484,11 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
return VF > MaxVF || !isPowerOf2_32(VF);
}
+bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
+ return !Subtarget.hasVInstructions() ||
+ VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
+}
+
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
@@ -8718,6 +8723,29 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
}
+static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue Op0 = N->getOperand(1);
+ MVT OpVT = Op0.getSimpleValueType();
+ MVT ContainerVT = OpVT;
+ if (OpVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
+ Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
+ }
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDLoc DL(N);
+ auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
+ if (isOneConstant(N->getOperand(2)))
+ return Res;
+
+ // Convert -1 to VL.
+ SDValue Setcc =
+ DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
+ VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
+ return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
+}
+
static inline void promoteVCIXScalar(const SDValue &Op,
SmallVectorImpl<SDValue> &Operands,
SelectionDAG &DAG) {
@@ -8913,6 +8941,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::experimental_get_vector_length:
return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
+ case Intrinsic::experimental_cttz_elts:
+ return lowerCttzElts(Op.getNode(), DAG, Subtarget);
case Intrinsic::riscv_vmv_x_s: {
SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
@@ -12336,6 +12366,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
+ case Intrinsic::experimental_cttz_elts: {
+ SDValue Res = lowerCttzElts(N, DAG, Subtarget);
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
+ return;
+ }
case Intrinsic::riscv_orc_b:
case Intrinsic::riscv_brev8:
case Intrinsic::riscv_sha256sig0:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ace5b3fd2b95b4..e2633733c31b19 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -986,6 +986,8 @@ class RISCVTargetLowering : public TargetLowering {
bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
bool IsScalable) const override;
+ bool shouldExpandCttzElements(EVT VT) const override;
+
/// RVV code generation for fixed length vectors does not lower all
/// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
/// merge. However, merging them creates a BUILD_VECTOR that is just as
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
index 65d0768c60885d..ea8feef3329840 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
@@ -128,43 +128,113 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; RV32-LABEL: ctz_nxv16i1:
; RV32: # %bb.0:
-; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; RV32-NEXT: vfirst.m a0, v8
+; RV32-NEXT: bgez a0, .LBB2_2
+; RV32-NEXT: # %bb.1:
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: li a1, -1
-; RV32-NEXT: vmadd.vx v16, a1, v8
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
-; RV32-NEXT: vredmaxu.vs v8, v8, v8
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: .LBB2_2:
; RV32-NEXT: ret
;
; RV64-LABEL: ctz_nxv16i1:
; RV64: # %bb.0:
-; RV64-NEXT: vmv1r.v v0, v8
+; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; RV64-NEXT: vfirst.m a0, v8
+; RV64-NEXT: bgez a0, .LBB2_2
+; RV64-NEXT: # %bb.1:
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: li a1, -1
-; RV64-NEXT: vmadd.vx v16, a1, v8
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
-; RV64-NEXT: vredmaxu.vs v8, v8, v8
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: .LBB2_2:
; RV64-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
ret i32 %res
}
+define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
+; RV32-LABEL: ctz_nxv16i1_poison:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; RV32-NEXT: vfirst.m a0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ctz_nxv16i1_poison:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; RV64-NEXT: vfirst.m a0, v8
+; RV64-NEXT: ret
+ %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
+ ret i32 %res
+}
+
+define i32 @ctz_v16i1(<16 x i1> %pg, <16 x i1> %a) {
+; RV32-LABEL: ctz_v16i1:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vfirst.m a0, v8
+; RV32-NEXT: bgez a0, .LBB4_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a0, 16
+; RV32-NEXT: .LBB4_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ctz_v16i1:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vfirst.m a0, v8
+; RV64-NEXT: bgez a0, .LBB4_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a0, 16
+; RV64-NEXT: .LBB4_2:
+; RV64-NEXT: ret
+ %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
+ ret i32 %res
+}
+
+define i32 @ctz_v16i1_poison(<16 x i1> %pg, <16 x i1> %a) {
+; RV32-LABEL: ctz_v16i1_poison:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vfirst.m a0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ctz_v16i1_poison:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vfirst.m a0, v8
+; RV64-NEXT: ret
+ %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
+ ret i32 %res
+}
+
+define i16 @ctz_v8i1_i16_ret(<8 x i1> %a) {
+; RV32-LABEL: ctz_v8i1_i16_ret:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vfirst.m a0, v0
+; RV32-NEXT: bgez a0, .LBB6_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a0, 8
+; RV32-NEXT: .LBB6_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ctz_v8i1_i16_ret:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vfirst.m a0, v0
+; RV64-NEXT: bgez a0, .LBB6_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a0, 8
+; RV64-NEXT: .LBB6_2:
+; RV64-NEXT: ret
+ %res = call i16 @llvm.experimental.cttz.elts.i16.v8i1(<8 x i1> %a, i1 0)
+ ret i16 %res
+}
+
declare i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16>, i1)
declare i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1>, i1)
declare i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32>, i1)
+declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1)
+declare i16 @llvm.experimental.cttz.elts.i16.v16i1(<8 x i1>, i1)
attributes #0 = { vscale_range(2,1024) }
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
index 49d4760a2e9abf..94b717b42e92b6 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
@@ -48,31 +48,13 @@ define i32 @ctz_v2i1_poison(<2 x i1> %a) {
; RV32-LABEL: ctz_v2i1_poison:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vmerge.vim v8, v8, -1, v0
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vi v9, v9, 2
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vredmaxu.vs v8, v8, v8
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: li a1, 2
-; RV32-NEXT: sub a1, a1, a0
-; RV32-NEXT: andi a0, a1, 255
+; RV32-NEXT: vfirst.m a0, v0
; RV32-NEXT: ret
;
; RV64-LABEL: ctz_v2i1_poison:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vim v8, v8, -1, v0
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vi v9, v9, 2
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vredmaxu.vs v8, v8, v8
-; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: li a1, 2
-; RV64-NEXT: subw a1, a1, a0
-; RV64-NEXT: andi a0, a1, 255
+; RV64-NEXT: vfirst.m a0, v0
; RV64-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)
ret i32 %res
More information about the llvm-commits
mailing list