[llvm] [ISel/RISCV] Custom-lower vector [l]lround (PR #147713)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 05:32:42 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
Lower it just like the vector [l]lrint, using vfcvt, with the right rounding mode. Updating costs to account for this custom-lowering is left to a companion patch.
---
Patch is 184.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147713.diff
5 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+15-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llround.ll (+158-1455)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lround.ll (+261-1291)
- (added) llvm/test/CodeGen/RISCV/rvv/llround-sdnode.ll (+282)
- (added) llvm/test/CodeGen/RISCV/rvv/lround-sdnode.ll (+759)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index dcb4f690ba35c..db709063b1977 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1070,6 +1070,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// vXf32.
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
Custom);
@@ -1151,6 +1152,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -1453,6 +1455,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
if (Subtarget.hasStdExtZfhmin()) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
} else {
@@ -1478,6 +1481,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
} else {
@@ -1511,7 +1515,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
- ISD::LLRINT, ISD::FNEARBYINT},
+ ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
+ ISD::FNEARBYINT},
VT, Custom);
setCondCodeAction(VFPCCToExpand, VT, Expand);
@@ -3211,7 +3216,11 @@ static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
case ISD::VP_FCEIL:
return RISCVFPRndMode::RUP;
case ISD::FROUND:
+ case ISD::LROUND:
+ case ISD::LLROUND:
case ISD::STRICT_FROUND:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
case ISD::VP_FROUND:
return RISCVFPRndMode::RMM;
case ISD::FRINT:
@@ -3469,9 +3478,9 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
}
-// Expand vector LRINT and LLRINT by converting to the integer domain.
-static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
+static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SDLoc DL(Op);
MVT DstVT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
@@ -7711,11 +7720,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
case ISD::LRINT:
case ISD::LLRINT:
- if (Op.getValueType().isVector())
- return lowerVectorXRINT(Op, DAG, Subtarget);
- [[fallthrough]];
case ISD::LROUND:
case ISD::LLROUND: {
+ if (Op.getValueType().isVector())
+ return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
assert(Op.getOperand(0).getValueType() == MVT::f16 &&
"Unexpected custom legalisation");
SDLoc DL(Op);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llround.ll
index b8ca7fd71cb93..5751759ddd9cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llround.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llround.ll
@@ -1,35 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d,+zvfh -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d,+zvfh -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
define <1 x i64> @llround_v1f16(<1 x half> %x) nounwind {
; RV32-LABEL: llround_v1f16:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: sw a0, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT: vfwcvt.f.f.v v9, v8
+; RV32-NEXT: fsrmi a0, 4
+; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v8, v9
+; RV32-NEXT: fsrm a0
; RV32-NEXT: ret
;
; RV64-LABEL: llround_v1f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-NEXT: vfwcvt.f.f.v v9, v8
+; RV64-NEXT: fsrmi a0, 4
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v8, v9
+; RV64-NEXT: fsrm a0
; RV64-NEXT: ret
%a = call <1 x i64> @llvm.llround.v1i64.v1f16(<1 x half> %x)
ret <1 x i64> %a
@@ -39,58 +32,22 @@ declare <1 x i64> @llvm.llround.v1i64.v1f16(<1 x half>)
define <2 x i64> @llround_v2f16(<2 x half> %x) nounwind {
; RV32-LABEL: llround_v2f16:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vl1r.v v8, (a2) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vfwcvt.f.f.v v9, v8
+; RV32-NEXT: fsrmi a0, 4
+; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v8, v9
+; RV32-NEXT: fsrm a0
; RV32-NEXT: ret
;
; RV64-LABEL: llround_v2f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: fcvt.l.s a1, fa5, rmm
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vfwcvt.f.f.v v9, v8
+; RV64-NEXT: fsrmi a0, 4
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v8, v9
+; RV64-NEXT: fsrm a0
; RV64-NEXT: ret
%a = call <2 x i64> @llvm.llround.v2i64.v2f16(<2 x half> %x)
ret <2 x i64> %a
@@ -100,108 +57,22 @@ declare <2 x i64> @llvm.llround.v2i64.v2f16(<2 x half>)
define <3 x i64> @llround_v3f16(<3 x half> %x) nounwind {
; RV32-LABEL: llround_v3f16:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vfwcvt.f.f.v v10, v8
+; RV32-NEXT: fsrmi a0, 4
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v8, v10
+; RV32-NEXT: fsrm a0
; RV32-NEXT: ret
;
; RV64-LABEL: llround_v3f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: fcvt.l.s a1, fa5, rmm
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v11
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vfwcvt.f.f.v v10, v8
+; RV64-NEXT: fsrmi a0, 4
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v8, v10
+; RV64-NEXT: fsrm a0
; RV64-NEXT: ret
%a = call <3 x i64> @llvm.llround.v3i64.v3f16(<3 x half> %x)
ret <3 x i64> %a
@@ -211,108 +82,22 @@ declare <3 x i64> @llvm.llround.v3i64.v3f16(<3 x half>)
define <4 x i64> @llround_v4f16(<4 x half> %x) nounwind {
; RV32-LABEL: llround_v4f16:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vfwcvt.f.f.v v10, v8
+; RV32-NEXT: fsrmi a0, 4
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v8, v10
+; RV32-NEXT: fsrm a0
; RV32-NEXT: ret
;
; RV64-LABEL: llround_v4f16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: fcvt.l.s a1, fa5, rmm
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v11
-; RV64-NEXT: fcvt.s.h fa5, fa5
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5, rmm
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vfwcvt.f.f.v v10, v8
+; RV64-NEXT: fsrmi a0, 4
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v8, v10
+; RV64-NEXT: fsrm a0
; RV64-NEXT: ret
%a = call <4 x i64> @llvm.llround.v4i64.v4f16(<4 x half> %x)
ret <4 x i64> %a
@@ -322,147 +107,22 @@ declare <4 x i64> @llvm.llround.v4i64.v4f16(<4 x half>)
define <8 x i64> @llround_v8f16(<8 x half> %x) nounwind {
; RV32-LABEL: llround_v8f16:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -208
-; RV32-NEXT: sw ra, 204(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 200(sp) # 4-byte Folded Spill
-; RV32-NEXT: addi s0, sp, 208
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: sw a0, 64(sp)
-; RV32-NEXT: sw a1, 68(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.s.h fa0, fa5
-; RV32-NEXT: call llroundf
-; RV32-NEXT: sw a0, 120(sp)
-; RV32-NEXT: sw a1, 124(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 6
-; ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/147713
More information about the llvm-commits
mailing list