[llvm] [ISel/RISCV] Fix fixed-vector [l]lrint lowering (PR #145898)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 07:17:54 PDT 2025
https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/145898
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality.
>From c286835db60eef1ff545a6f6461c3ef050e6289a Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 26 Jun 2025 14:25:48 +0100
Subject: [PATCH] [ISel/RISCV] Fix fixed-vector [l]lrint lowering
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering
routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to
this new functionality.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 46 +-
.../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 931 +-----------------
.../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 781 +--------------
3 files changed, 127 insertions(+), 1631 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 13ee3ee63d1a6..71d62c7342328 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1499,7 +1499,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
+ ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
+ ISD::LLRINT, ISD::FNEARBYINT},
VT, Custom);
setCondCodeAction(VFPCCToExpand, VT, Expand);
@@ -3202,7 +3203,14 @@ static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
case ISD::VP_FROUND:
return RISCVFPRndMode::RMM;
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
case ISD::VP_FRINT:
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
return RISCVFPRndMode::DYN;
}
@@ -3452,28 +3460,34 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
// Expand vector LRINT and LLRINT by converting to the integer domain.
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- MVT VT = Op.getSimpleValueType();
- assert(VT.isVector() && "Unexpected type");
-
SDLoc DL(Op);
+ MVT DstVT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
- MVT ContainerVT = VT;
+ MVT SrcVT = Src.getSimpleValueType();
+ assert(SrcVT.isVector() && DstVT.isVector() &&
+ !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
+ "Unexpected type");
- if (VT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
- Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ MVT DstContainerVT = DstVT;
+ MVT SrcContainerVT = SrcVT;
+
+ if (DstVT.isFixedLengthVector()) {
+ DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
+ SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
}
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- SDValue Truncated = DAG.getNode(
- RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
- DAG.getTargetConstant(RISCVFPRndMode::DYN, DL, Subtarget.getXLenVT()),
- VL);
+ auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
+ SDValue Res =
+ DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
+ DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
+ Subtarget.getXLenVT()),
+ VL);
- if (!VT.isFixedLengthVector())
- return Truncated;
+ if (!DstVT.isFixedLengthVector())
+ return Res;
- return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
+ return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
}
static SDValue
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index c1252902ede69..0a6f9f5ba0928 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -7,31 +7,16 @@
define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
; RV32-LABEL: llrint_v1i64_v1f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v9, v8
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v1i64_v1f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v9, v8
+; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
ret <1 x i64> %a
@@ -41,60 +26,16 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; RV32-LABEL: llrint_v2i64_v2f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vl1r.v v8, (a2) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v9, v8
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v2i64_v2f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v9, v8
+; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
@@ -104,106 +45,16 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
; RV32-LABEL: llrint_v3i64_v3f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v3i64_v3f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v11
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv1r.v v10, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v10
; RV64-NEXT: ret
%a = call <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float> %x)
ret <3 x i64> %a
@@ -213,106 +64,16 @@ declare <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float>)
define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; RV32-LABEL: llrint_v4i64_v4f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v4i64_v4f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v11
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv1r.v v10, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v10
; RV64-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
ret <4 x i64> %a
@@ -322,149 +83,16 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; RV32-LABEL: llrint_v8i64_v8f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -208
-; RV32-NEXT: .cfi_def_cfa_offset 208
-; RV32-NEXT: sw ra, 204(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 200(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 208
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 64(sp)
-; RV32-NEXT: sw a1, 68(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 120(sp)
-; RV32-NEXT: sw a1, 124(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 6
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 112(sp)
-; RV32-NEXT: sw a1, 116(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 5
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 104(sp)
-; RV32-NEXT: sw a1, 108(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 4
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 96(sp)
-; RV32-NEXT: sw a1, 100(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 88(sp)
-; RV32-NEXT: sw a1, 92(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 80(sp)
-; RV32-NEXT: sw a1, 84(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 72(sp)
-; RV32-NEXT: sw a1, 76(sp)
-; RV32-NEXT: addi a0, sp, 64
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: addi sp, s0, -208
-; RV32-NEXT: .cfi_def_cfa sp, 208
-; RV32-NEXT: lw ra, 204(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 200(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: .cfi_restore s0
-; RV32-NEXT: addi sp, sp, 208
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v8i64_v8f32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -128
-; RV64-NEXT: .cfi_def_cfa_offset 128
-; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 128
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslidedown.vi v10, v8, 7
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vslidedown.vi v10, v8, 6
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vslidedown.vi v10, v8, 5
-; RV64-NEXT: fcvt.l.s a2, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: fcvt.l.s a3, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 3
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: fcvt.l.s a4, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a5, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a6, fa5
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: sd a4, 32(sp)
-; RV64-NEXT: sd a3, 40(sp)
-; RV64-NEXT: sd a2, 48(sp)
-; RV64-NEXT: sd a1, 56(sp)
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: sd a0, 0(sp)
-; RV64-NEXT: sd a1, 8(sp)
-; RV64-NEXT: sd a6, 16(sp)
-; RV64-NEXT: sd a5, 24(sp)
-; RV64-NEXT: mv a0, sp
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: addi sp, s0, -128
-; RV64-NEXT: .cfi_def_cfa sp, 128
-; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-NEXT: .cfi_restore ra
-; RV64-NEXT: .cfi_restore s0
-; RV64-NEXT: addi sp, sp, 128
-; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vmv2r.v v12, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v12
; RV64-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
ret <8 x i64> %a
@@ -474,214 +102,16 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; RV32-LABEL: llrint_v16i64_v16f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -400
-; RV32-NEXT: .cfi_def_cfa_offset 400
-; RV32-NEXT: sw ra, 396(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 392(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 400
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: andi sp, sp, -128
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT: addi a0, sp, 64
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: flw fa0, 124(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 248(sp)
-; RV32-NEXT: sw a1, 252(sp)
-; RV32-NEXT: flw fa0, 120(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 240(sp)
-; RV32-NEXT: sw a1, 244(sp)
-; RV32-NEXT: flw fa0, 116(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 232(sp)
-; RV32-NEXT: sw a1, 236(sp)
-; RV32-NEXT: flw fa0, 112(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 224(sp)
-; RV32-NEXT: sw a1, 228(sp)
-; RV32-NEXT: flw fa0, 108(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 216(sp)
-; RV32-NEXT: sw a1, 220(sp)
-; RV32-NEXT: flw fa0, 104(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 208(sp)
-; RV32-NEXT: sw a1, 212(sp)
-; RV32-NEXT: flw fa0, 100(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 200(sp)
-; RV32-NEXT: sw a1, 204(sp)
-; RV32-NEXT: flw fa0, 96(sp)
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 192(sp)
-; RV32-NEXT: sw a1, 196(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 128(sp)
-; RV32-NEXT: sw a1, 132(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 152(sp)
-; RV32-NEXT: sw a1, 156(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 144(sp)
-; RV32-NEXT: sw a1, 148(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 136(sp)
-; RV32-NEXT: sw a1, 140(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 184(sp)
-; RV32-NEXT: sw a1, 188(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 6
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 176(sp)
-; RV32-NEXT: sw a1, 180(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 5
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 168(sp)
-; RV32-NEXT: sw a1, 172(sp)
-; RV32-NEXT: addi a0, sp, 384
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 4
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 160(sp)
-; RV32-NEXT: sw a1, 164(sp)
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: addi a1, sp, 128
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: addi sp, s0, -400
-; RV32-NEXT: .cfi_def_cfa sp, 400
-; RV32-NEXT: lw ra, 396(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 392(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: .cfi_restore s0
-; RV32-NEXT: addi sp, sp, 400
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vmv4r.v v16, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v16
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v16i64_v16f32:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -384
-; RV64-NEXT: .cfi_def_cfa_offset 384
-; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 384
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: addi a0, sp, 64
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: flw fa5, 124(sp)
-; RV64-NEXT: vfmv.f.s fa4, v8
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 248(sp)
-; RV64-NEXT: flw fa5, 120(sp)
-; RV64-NEXT: vslidedown.vi v13, v8, 1
-; RV64-NEXT: fcvt.l.s a0, fa4
-; RV64-NEXT: vfmv.f.s fa4, v10
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: sd a1, 240(sp)
-; RV64-NEXT: flw fa5, 116(sp)
-; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 7
-; RV64-NEXT: fcvt.l.s a1, fa4
-; RV64-NEXT: vfmv.f.s fa4, v12
-; RV64-NEXT: fcvt.l.s a2, fa5
-; RV64-NEXT: sd a2, 232(sp)
-; RV64-NEXT: flw fa5, 112(sp)
-; RV64-NEXT: fcvt.l.s a2, fa4
-; RV64-NEXT: vfmv.f.s fa4, v13
-; RV64-NEXT: vslidedown.vi v12, v8, 6
-; RV64-NEXT: fcvt.l.s a3, fa5
-; RV64-NEXT: sd a3, 224(sp)
-; RV64-NEXT: flw fa5, 108(sp)
-; RV64-NEXT: fcvt.l.s a3, fa4
-; RV64-NEXT: vfmv.f.s fa4, v10
-; RV64-NEXT: vslidedown.vi v10, v8, 5
-; RV64-NEXT: fcvt.l.s a4, fa5
-; RV64-NEXT: sd a4, 216(sp)
-; RV64-NEXT: flw fa5, 104(sp)
-; RV64-NEXT: fcvt.l.s a4, fa4
-; RV64-NEXT: vfmv.f.s fa4, v12
-; RV64-NEXT: fcvt.l.s a5, fa4
-; RV64-NEXT: fcvt.l.s a6, fa5
-; RV64-NEXT: sd a6, 208(sp)
-; RV64-NEXT: flw fa5, 100(sp)
-; RV64-NEXT: vfmv.f.s fa4, v10
-; RV64-NEXT: fcvt.l.s a6, fa4
-; RV64-NEXT: vslidedown.vi v8, v8, 4
-; RV64-NEXT: fcvt.l.s a7, fa5
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: sd a7, 200(sp)
-; RV64-NEXT: fcvt.l.s a7, fa5
-; RV64-NEXT: flw fa5, 96(sp)
-; RV64-NEXT: sd a0, 128(sp)
-; RV64-NEXT: sd a3, 136(sp)
-; RV64-NEXT: sd a2, 144(sp)
-; RV64-NEXT: sd a1, 152(sp)
-; RV64-NEXT: sd a7, 160(sp)
-; RV64-NEXT: sd a6, 168(sp)
-; RV64-NEXT: sd a5, 176(sp)
-; RV64-NEXT: sd a4, 184(sp)
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 192(sp)
-; RV64-NEXT: addi a0, sp, 128
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: addi sp, s0, -384
-; RV64-NEXT: .cfi_def_cfa sp, 384
-; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
-; RV64-NEXT: .cfi_restore ra
-; RV64-NEXT: .cfi_restore s0
-; RV64-NEXT: addi sp, sp, 384
-; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: vmv4r.v v16, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v16
; RV64-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
ret <16 x i64> %a
@@ -691,30 +121,14 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
; RV32-LABEL: llrint_v1i64_v1f64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v1i64_v1f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: vfcvt.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
ret <1 x i64> %a
@@ -724,59 +138,14 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
; RV32-LABEL: llrint_v2i64_v2f64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vl1r.v v8, (a2) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v2i64_v2f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.d a1, fa5
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vfcvt.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
ret <2 x i64> %a
@@ -786,103 +155,14 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
; RV32-LABEL: llrint_v4i64_v4f64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v4i64_v4f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v12
-; RV64-NEXT: fcvt.l.d a1, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v10, a0
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslide1down.vx v8, v10, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vfcvt.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
ret <4 x i64> %a
@@ -892,137 +172,14 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
; RV32-LABEL: llrint_v8i64_v8f64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -272
-; RV32-NEXT: .cfi_def_cfa_offset 272
-; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 272
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a0, sp, 256
-; RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; RV32-NEXT: addi a0, sp, 64
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: fld fa0, 120(sp)
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 184(sp)
-; RV32-NEXT: sw a1, 188(sp)
-; RV32-NEXT: fld fa0, 112(sp)
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 176(sp)
-; RV32-NEXT: sw a1, 180(sp)
-; RV32-NEXT: fld fa0, 104(sp)
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 168(sp)
-; RV32-NEXT: sw a1, 172(sp)
-; RV32-NEXT: fld fa0, 96(sp)
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 160(sp)
-; RV32-NEXT: sw a1, 164(sp)
-; RV32-NEXT: addi a0, sp, 256
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 128(sp)
-; RV32-NEXT: sw a1, 132(sp)
-; RV32-NEXT: addi a0, sp, 256
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 136(sp)
-; RV32-NEXT: sw a1, 140(sp)
-; RV32-NEXT: addi a0, sp, 256
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 152(sp)
-; RV32-NEXT: sw a1, 156(sp)
-; RV32-NEXT: addi a0, sp, 256
-; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrint
-; RV32-NEXT: sw a0, 144(sp)
-; RV32-NEXT: sw a1, 148(sp)
-; RV32-NEXT: addi a0, sp, 128
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: addi sp, s0, -272
-; RV32-NEXT: .cfi_def_cfa sp, 272
-; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: .cfi_restore s0
-; RV32-NEXT: addi sp, sp, 272
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v8i64_v8f64:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -192
-; RV64-NEXT: .cfi_def_cfa_offset 192
-; RV64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 192
-; RV64-NEXT: .cfi_def_cfa s0, 0
-; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: mv a0, sp
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: fld fa5, 56(sp)
-; RV64-NEXT: vfmv.f.s fa4, v8
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-NEXT: fcvt.l.d a0, fa4
-; RV64-NEXT: fcvt.l.d a1, fa5
-; RV64-NEXT: sd a1, 120(sp)
-; RV64-NEXT: fld fa5, 48(sp)
-; RV64-NEXT: vfmv.f.s fa4, v10
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-NEXT: fcvt.l.d a1, fa4
-; RV64-NEXT: fcvt.l.d a2, fa5
-; RV64-NEXT: sd a2, 112(sp)
-; RV64-NEXT: fld fa5, 40(sp)
-; RV64-NEXT: vfmv.f.s fa4, v10
-; RV64-NEXT: fcvt.l.d a2, fa4
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: fcvt.l.d a3, fa5
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: sd a3, 104(sp)
-; RV64-NEXT: fcvt.l.d a3, fa5
-; RV64-NEXT: fld fa5, 32(sp)
-; RV64-NEXT: sd a0, 64(sp)
-; RV64-NEXT: sd a1, 72(sp)
-; RV64-NEXT: sd a3, 80(sp)
-; RV64-NEXT: sd a2, 88(sp)
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: sd a0, 96(sp)
-; RV64-NEXT: addi a0, sp, 64
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: addi sp, s0, -192
-; RV64-NEXT: .cfi_def_cfa sp, 192
-; RV64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT: .cfi_restore ra
-; RV64-NEXT: .cfi_restore s0
-; RV64-NEXT: addi sp, sp, 192
-; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: vfcvt.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
ret <8 x i64> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index 27abd624f7765..ef2208666e0b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -9,27 +9,21 @@
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; RV32-LABEL: lrint_v1f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v1f32:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vmv.s.x v8, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v1f32:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-i64-NEXT: vmv.s.x v8, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8
+; RV64-i64-NEXT: vmv1r.v v8, v9
; RV64-i64-NEXT: ret
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
ret <1 x iXLen> %a
@@ -40,38 +34,20 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
; RV32-LABEL: lrint_v2f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v9
-; RV32-NEXT: fcvt.w.s a1, fa5
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v2f32:
; RV64-i32: # %bb.0:
; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v9
-; RV64-i32-NEXT: fcvt.l.s a1, fa5
-; RV64-i32-NEXT: vmv.v.x v8, a0
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a1
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v2f32:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v9
-; RV64-i64-NEXT: fcvt.l.s a1, fa5
-; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-i64-NEXT: vmv.v.x v8, a0
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-i64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8
+; RV64-i64-NEXT: vmv1r.v v8, v9
; RV64-i64-NEXT: ret
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
ret <2 x iXLen> %a
@@ -82,64 +58,20 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
; RV32-LABEL: lrint_v3f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v9
-; RV32-NEXT: fcvt.w.s a1, fa5
-; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: vmv.v.x v9, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vslide1down.vx v8, v9, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v3f32:
; RV64-i32: # %bb.0:
; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v9
-; RV64-i32-NEXT: fcvt.l.s a1, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: vmv.v.x v9, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vslide1down.vx v8, v9, a1
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v3f32:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v9
-; RV64-i64-NEXT: fcvt.l.s a1, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vmv.v.x v8, a0
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v11
-; RV64-i64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i64-NEXT: vmv1r.v v10, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v10
; RV64-i64-NEXT: ret
%a = call <3 x iXLen> @llvm.lrint.v3iXLen.v3f32(<3 x float> %x)
ret <3 x iXLen> %a
@@ -150,64 +82,20 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
; RV32-LABEL: lrint_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v9
-; RV32-NEXT: fcvt.w.s a1, fa5
-; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: vmv.v.x v9, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vslide1down.vx v8, v9, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v4f32:
; RV64-i32: # %bb.0:
; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v9
-; RV64-i32-NEXT: fcvt.l.s a1, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: vmv.v.x v9, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vslide1down.vx v8, v9, a1
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v4f32:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v9
-; RV64-i64-NEXT: fcvt.l.s a1, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vmv.v.x v8, a0
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v11
-; RV64-i64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i64-NEXT: vmv1r.v v10, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v10
; RV64-i64-NEXT: ret
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
ret <4 x iXLen> %a
@@ -217,133 +105,21 @@ declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
; RV32-LABEL: lrint_v8f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vslidedown.vi v11, v8, 2
-; RV32-NEXT: vslidedown.vi v12, v8, 3
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: fcvt.w.s a1, fa5
-; RV32-NEXT: vfmv.f.s fa5, v11
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v12
-; RV32-NEXT: vslidedown.vi v12, v8, 4
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: fcvt.w.s a1, fa5
-; RV32-NEXT: vfmv.f.s fa5, v12
-; RV32-NEXT: vslidedown.vi v12, v8, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v12
-; RV32-NEXT: vslidedown.vi v12, v8, 6
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: fcvt.w.s a1, fa5
-; RV32-NEXT: vfmv.f.s fa5, v12
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vslide1down.vx v8, v10, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v8f32:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vslidedown.vi v11, v8, 2
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 3
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: fcvt.l.s a1, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v11
; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-i32-NEXT: vmv.v.x v10, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 4
-; RV64-i32-NEXT: vslide1down.vx v10, v10, a1
-; RV64-i32-NEXT: fcvt.l.s a1, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 5
-; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 6
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 7
-; RV64-i32-NEXT: vslide1down.vx v10, v10, a1
-; RV64-i32-NEXT: fcvt.l.s a1, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
-; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vslide1down.vx v8, v10, a1
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v8f32:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: addi sp, sp, -128
-; RV64-i64-NEXT: .cfi_def_cfa_offset 128
-; RV64-i64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-i64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-i64-NEXT: .cfi_offset ra, -8
-; RV64-i64-NEXT: .cfi_offset s0, -16
-; RV64-i64-NEXT: addi s0, sp, 128
-; RV64-i64-NEXT: .cfi_def_cfa s0, 0
-; RV64-i64-NEXT: andi sp, sp, -64
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 7
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 6
-; RV64-i64-NEXT: fcvt.l.s a1, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 5
-; RV64-i64-NEXT: fcvt.l.s a2, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-i64-NEXT: fcvt.l.s a3, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v9, v8, 3
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-i64-NEXT: fcvt.l.s a4, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v9
-; RV64-i64-NEXT: fcvt.l.s a5, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a6, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: sd a4, 32(sp)
-; RV64-i64-NEXT: sd a3, 40(sp)
-; RV64-i64-NEXT: sd a2, 48(sp)
-; RV64-i64-NEXT: sd a1, 56(sp)
-; RV64-i64-NEXT: fcvt.l.s a1, fa5
-; RV64-i64-NEXT: sd a0, 0(sp)
-; RV64-i64-NEXT: sd a1, 8(sp)
-; RV64-i64-NEXT: sd a6, 16(sp)
-; RV64-i64-NEXT: sd a5, 24(sp)
-; RV64-i64-NEXT: mv a0, sp
-; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-i64-NEXT: vle64.v v8, (a0)
-; RV64-i64-NEXT: addi sp, s0, -128
-; RV64-i64-NEXT: .cfi_def_cfa sp, 128
-; RV64-i64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-i64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-i64-NEXT: .cfi_restore ra
-; RV64-i64-NEXT: .cfi_restore s0
-; RV64-i64-NEXT: addi sp, sp, 128
-; RV64-i64-NEXT: .cfi_def_cfa_offset 0
+; RV64-i64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i64-NEXT: vmv2r.v v12, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v12
; RV64-i64-NEXT: ret
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
ret <8 x iXLen> %a
@@ -353,254 +129,21 @@ declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
; RV32-LABEL: lrint_v16f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -192
-; RV32-NEXT: .cfi_def_cfa_offset 192
-; RV32-NEXT: sw ra, 188(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 184(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 192
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: mv a0, sp
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: flw fa5, 60(sp)
-; RV32-NEXT: vfmv.f.s fa4, v8
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 3
-; RV32-NEXT: vslidedown.vi v11, v8, 2
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 124(sp)
-; RV32-NEXT: flw fa5, 56(sp)
-; RV32-NEXT: fcvt.w.s a0, fa4
-; RV32-NEXT: vfmv.f.s fa4, v10
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: fcvt.w.s a1, fa5
-; RV32-NEXT: sw a1, 120(sp)
-; RV32-NEXT: flw fa5, 52(sp)
-; RV32-NEXT: fcvt.w.s a1, fa4
-; RV32-NEXT: vfmv.f.s fa4, v11
-; RV32-NEXT: fcvt.w.s a2, fa4
-; RV32-NEXT: fcvt.w.s a3, fa5
-; RV32-NEXT: sw a3, 116(sp)
-; RV32-NEXT: flw fa5, 48(sp)
-; RV32-NEXT: vfmv.f.s fa4, v10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 7
-; RV32-NEXT: fcvt.w.s a3, fa4
-; RV32-NEXT: fcvt.w.s a4, fa5
-; RV32-NEXT: sw a4, 112(sp)
-; RV32-NEXT: flw fa5, 44(sp)
-; RV32-NEXT: vfmv.f.s fa4, v10
-; RV32-NEXT: vslidedown.vi v10, v8, 6
-; RV32-NEXT: fcvt.w.s a4, fa4
-; RV32-NEXT: fcvt.w.s a5, fa5
-; RV32-NEXT: sw a5, 108(sp)
-; RV32-NEXT: flw fa5, 40(sp)
-; RV32-NEXT: vfmv.f.s fa4, v10
-; RV32-NEXT: vslidedown.vi v10, v8, 5
-; RV32-NEXT: fcvt.w.s a5, fa4
-; RV32-NEXT: fcvt.w.s a6, fa5
-; RV32-NEXT: sw a6, 104(sp)
-; RV32-NEXT: flw fa5, 36(sp)
-; RV32-NEXT: vfmv.f.s fa4, v10
-; RV32-NEXT: fcvt.w.s a6, fa4
-; RV32-NEXT: vslidedown.vi v8, v8, 4
-; RV32-NEXT: fcvt.w.s a7, fa5
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: sw a7, 100(sp)
-; RV32-NEXT: fcvt.w.s a7, fa5
-; RV32-NEXT: flw fa5, 32(sp)
-; RV32-NEXT: sw a0, 64(sp)
-; RV32-NEXT: sw a3, 68(sp)
-; RV32-NEXT: sw a2, 72(sp)
-; RV32-NEXT: sw a1, 76(sp)
-; RV32-NEXT: sw a7, 80(sp)
-; RV32-NEXT: sw a6, 84(sp)
-; RV32-NEXT: sw a5, 88(sp)
-; RV32-NEXT: sw a4, 92(sp)
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 96(sp)
-; RV32-NEXT: addi a0, sp, 64
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: addi sp, s0, -192
-; RV32-NEXT: .cfi_def_cfa sp, 192
-; RV32-NEXT: lw ra, 188(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 184(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: .cfi_restore s0
-; RV32-NEXT: addi sp, sp, 192
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vfcvt.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v16f32:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: addi sp, sp, -192
-; RV64-i32-NEXT: .cfi_def_cfa_offset 192
-; RV64-i32-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; RV64-i32-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; RV64-i32-NEXT: .cfi_offset ra, -8
-; RV64-i32-NEXT: .cfi_offset s0, -16
-; RV64-i32-NEXT: addi s0, sp, 192
-; RV64-i32-NEXT: .cfi_def_cfa s0, 0
-; RV64-i32-NEXT: andi sp, sp, -64
-; RV64-i32-NEXT: mv a0, sp
-; RV64-i32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-i32-NEXT: vse32.v v8, (a0)
-; RV64-i32-NEXT: flw fa5, 60(sp)
-; RV64-i32-NEXT: vfmv.f.s fa4, v8
-; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 3
-; RV64-i32-NEXT: vslidedown.vi v11, v8, 2
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 124(sp)
-; RV64-i32-NEXT: flw fa5, 56(sp)
-; RV64-i32-NEXT: fcvt.l.s a0, fa4
-; RV64-i32-NEXT: vfmv.f.s fa4, v10
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
-; RV64-i32-NEXT: fcvt.l.s a1, fa5
-; RV64-i32-NEXT: sw a1, 120(sp)
-; RV64-i32-NEXT: flw fa5, 52(sp)
-; RV64-i32-NEXT: fcvt.l.s a1, fa4
-; RV64-i32-NEXT: vfmv.f.s fa4, v11
-; RV64-i32-NEXT: fcvt.l.s a2, fa4
-; RV64-i32-NEXT: fcvt.l.s a3, fa5
-; RV64-i32-NEXT: sw a3, 116(sp)
-; RV64-i32-NEXT: flw fa5, 48(sp)
-; RV64-i32-NEXT: vfmv.f.s fa4, v10
-; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 7
-; RV64-i32-NEXT: fcvt.l.s a3, fa4
-; RV64-i32-NEXT: fcvt.l.s a4, fa5
-; RV64-i32-NEXT: sw a4, 112(sp)
-; RV64-i32-NEXT: flw fa5, 44(sp)
-; RV64-i32-NEXT: vfmv.f.s fa4, v10
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 6
-; RV64-i32-NEXT: fcvt.l.s a4, fa4
-; RV64-i32-NEXT: fcvt.l.s a5, fa5
-; RV64-i32-NEXT: sw a5, 108(sp)
-; RV64-i32-NEXT: flw fa5, 40(sp)
-; RV64-i32-NEXT: vfmv.f.s fa4, v10
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 5
-; RV64-i32-NEXT: fcvt.l.s a5, fa4
-; RV64-i32-NEXT: fcvt.l.s a6, fa5
-; RV64-i32-NEXT: sw a6, 104(sp)
-; RV64-i32-NEXT: flw fa5, 36(sp)
-; RV64-i32-NEXT: vfmv.f.s fa4, v10
-; RV64-i32-NEXT: fcvt.l.s a6, fa4
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 4
-; RV64-i32-NEXT: fcvt.l.s a7, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: sw a7, 100(sp)
-; RV64-i32-NEXT: fcvt.l.s a7, fa5
-; RV64-i32-NEXT: flw fa5, 32(sp)
-; RV64-i32-NEXT: sw a0, 64(sp)
-; RV64-i32-NEXT: sw a3, 68(sp)
-; RV64-i32-NEXT: sw a2, 72(sp)
-; RV64-i32-NEXT: sw a1, 76(sp)
-; RV64-i32-NEXT: sw a7, 80(sp)
-; RV64-i32-NEXT: sw a6, 84(sp)
-; RV64-i32-NEXT: sw a5, 88(sp)
-; RV64-i32-NEXT: sw a4, 92(sp)
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 96(sp)
-; RV64-i32-NEXT: addi a0, sp, 64
; RV64-i32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-i32-NEXT: vle32.v v8, (a0)
-; RV64-i32-NEXT: addi sp, s0, -192
-; RV64-i32-NEXT: .cfi_def_cfa sp, 192
-; RV64-i32-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; RV64-i32-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; RV64-i32-NEXT: .cfi_restore ra
-; RV64-i32-NEXT: .cfi_restore s0
-; RV64-i32-NEXT: addi sp, sp, 192
-; RV64-i32-NEXT: .cfi_def_cfa_offset 0
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v16f32:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: addi sp, sp, -384
-; RV64-i64-NEXT: .cfi_def_cfa_offset 384
-; RV64-i64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
-; RV64-i64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
-; RV64-i64-NEXT: .cfi_offset ra, -8
-; RV64-i64-NEXT: .cfi_offset s0, -16
-; RV64-i64-NEXT: addi s0, sp, 384
-; RV64-i64-NEXT: .cfi_def_cfa s0, 0
-; RV64-i64-NEXT: andi sp, sp, -128
-; RV64-i64-NEXT: addi a0, sp, 64
; RV64-i64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-i64-NEXT: vse32.v v8, (a0)
-; RV64-i64-NEXT: flw fa5, 124(sp)
-; RV64-i64-NEXT: vfmv.f.s fa4, v8
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-i64-NEXT: vslidedown.vi v12, v8, 2
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 248(sp)
-; RV64-i64-NEXT: flw fa5, 120(sp)
-; RV64-i64-NEXT: vslidedown.vi v13, v8, 1
-; RV64-i64-NEXT: fcvt.l.s a0, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v10
-; RV64-i64-NEXT: fcvt.l.s a1, fa5
-; RV64-i64-NEXT: sd a1, 240(sp)
-; RV64-i64-NEXT: flw fa5, 116(sp)
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 7
-; RV64-i64-NEXT: fcvt.l.s a1, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v12
-; RV64-i64-NEXT: fcvt.l.s a2, fa5
-; RV64-i64-NEXT: sd a2, 232(sp)
-; RV64-i64-NEXT: flw fa5, 112(sp)
-; RV64-i64-NEXT: fcvt.l.s a2, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v13
-; RV64-i64-NEXT: vslidedown.vi v12, v8, 6
-; RV64-i64-NEXT: fcvt.l.s a3, fa5
-; RV64-i64-NEXT: sd a3, 224(sp)
-; RV64-i64-NEXT: flw fa5, 108(sp)
-; RV64-i64-NEXT: fcvt.l.s a3, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v10
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 5
-; RV64-i64-NEXT: fcvt.l.s a4, fa5
-; RV64-i64-NEXT: sd a4, 216(sp)
-; RV64-i64-NEXT: flw fa5, 104(sp)
-; RV64-i64-NEXT: fcvt.l.s a4, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v12
-; RV64-i64-NEXT: fcvt.l.s a5, fa4
-; RV64-i64-NEXT: fcvt.l.s a6, fa5
-; RV64-i64-NEXT: sd a6, 208(sp)
-; RV64-i64-NEXT: flw fa5, 100(sp)
-; RV64-i64-NEXT: vfmv.f.s fa4, v10
-; RV64-i64-NEXT: fcvt.l.s a6, fa4
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 4
-; RV64-i64-NEXT: fcvt.l.s a7, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: sd a7, 200(sp)
-; RV64-i64-NEXT: fcvt.l.s a7, fa5
-; RV64-i64-NEXT: flw fa5, 96(sp)
-; RV64-i64-NEXT: sd a0, 128(sp)
-; RV64-i64-NEXT: sd a3, 136(sp)
-; RV64-i64-NEXT: sd a2, 144(sp)
-; RV64-i64-NEXT: sd a1, 152(sp)
-; RV64-i64-NEXT: sd a7, 160(sp)
-; RV64-i64-NEXT: sd a6, 168(sp)
-; RV64-i64-NEXT: sd a5, 176(sp)
-; RV64-i64-NEXT: sd a4, 184(sp)
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 192(sp)
-; RV64-i64-NEXT: addi a0, sp, 128
-; RV64-i64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-i64-NEXT: vle64.v v8, (a0)
-; RV64-i64-NEXT: addi sp, s0, -384
-; RV64-i64-NEXT: .cfi_def_cfa sp, 384
-; RV64-i64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
-; RV64-i64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
-; RV64-i64-NEXT: .cfi_restore ra
-; RV64-i64-NEXT: .cfi_restore s0
-; RV64-i64-NEXT: addi sp, sp, 384
-; RV64-i64-NEXT: .cfi_def_cfa_offset 0
+; RV64-i64-NEXT: vmv4r.v v16, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v16
; RV64-i64-NEXT: ret
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
ret <16 x iXLen> %a
@@ -610,26 +153,22 @@ declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
; RV32-LABEL: lrint_v1f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v9, v8
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v1f64:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vmv.s.x v8, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v9, v8
+; RV64-i32-NEXT: vmv1r.v v8, v9
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v1f64:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vmv.s.x v8, a0
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
; RV64-i64-NEXT: ret
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
ret <1 x iXLen> %a
@@ -639,40 +178,22 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
; RV32-LABEL: lrint_v2f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v9
-; RV32-NEXT: fcvt.w.d a1, fa5
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vfncvt.x.f.w v9, v8
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v2f64:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v9
-; RV64-i32-NEXT: fcvt.l.d a1, fa5
; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-i32-NEXT: vmv.v.x v8, a0
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a1
+; RV64-i32-NEXT: vfncvt.x.f.w v9, v8
+; RV64-i32-NEXT: vmv1r.v v8, v9
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v2f64:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v9
-; RV64-i64-NEXT: fcvt.l.d a1, fa5
-; RV64-i64-NEXT: vmv.v.x v8, a0
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
; RV64-i64-NEXT: ret
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
ret <2 x iXLen> %a
@@ -682,72 +203,22 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; RV32-LABEL: lrint_v4f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v12
-; RV32-NEXT: fcvt.w.d a1, fa5
-; RV32-NEXT: vfmv.f.s fa5, v10
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v9, a0
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v9, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vfncvt.x.f.w v10, v8
+; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v4f64:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
-; RV64-i32-NEXT: fcvt.l.d a1, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-i32-NEXT: vmv.v.x v9, a0
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-i32-NEXT: vslide1down.vx v8, v9, a1
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i32-NEXT: vfncvt.x.f.w v10, v8
+; RV64-i32-NEXT: vmv.v.v v8, v10
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v4f64:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v12, v8, 1
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v12
-; RV64-i64-NEXT: fcvt.l.d a1, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vmv.v.x v10, a0
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: vslide1down.vx v8, v10, a1
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
; RV64-i64-NEXT: ret
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
ret <4 x iXLen> %a
@@ -757,168 +228,22 @@ declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV32-LABEL: lrint_v8f64:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -128
-; RV32-NEXT: .cfi_def_cfa_offset 128
-; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 128
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v14, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 2
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa4, v14
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v12
-; RV32-NEXT: vfmv.f.s fa3, v8
-; RV32-NEXT: fcvt.w.d a1, fa4
-; RV32-NEXT: fcvt.w.d a2, fa5
-; RV32-NEXT: fcvt.w.d a3, fa3
-; RV32-NEXT: fld fa5, 32(sp)
-; RV32-NEXT: fld fa4, 40(sp)
-; RV32-NEXT: fld fa3, 48(sp)
-; RV32-NEXT: fld fa2, 56(sp)
-; RV32-NEXT: fcvt.w.d a4, fa5
-; RV32-NEXT: fcvt.w.d a5, fa4
-; RV32-NEXT: fcvt.w.d a6, fa3
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: vslide1down.vx v8, v8, a4
-; RV32-NEXT: vslide1down.vx v8, v8, a5
-; RV32-NEXT: vslide1down.vx v8, v8, a6
-; RV32-NEXT: fcvt.w.d a0, fa2
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: addi sp, s0, -128
-; RV32-NEXT: .cfi_def_cfa sp, 128
-; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: .cfi_restore s0
-; RV32-NEXT: addi sp, sp, 128
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vfncvt.x.f.w v12, v8
+; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v8f64:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: addi sp, sp, -128
-; RV64-i32-NEXT: .cfi_def_cfa_offset 128
-; RV64-i32-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
-; RV64-i32-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
-; RV64-i32-NEXT: .cfi_offset ra, -8
-; RV64-i32-NEXT: .cfi_offset s0, -16
-; RV64-i32-NEXT: addi s0, sp, 128
-; RV64-i32-NEXT: .cfi_def_cfa s0, 0
-; RV64-i32-NEXT: andi sp, sp, -64
-; RV64-i32-NEXT: mv a0, sp
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v14, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
-; RV64-i32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-i32-NEXT: vse64.v v8, (a0)
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
-; RV64-i32-NEXT: vfmv.f.s fa4, v14
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
-; RV64-i32-NEXT: vfmv.f.s fa3, v8
-; RV64-i32-NEXT: fcvt.l.d a1, fa4
-; RV64-i32-NEXT: fcvt.l.d a2, fa5
-; RV64-i32-NEXT: fcvt.l.d a3, fa3
-; RV64-i32-NEXT: fld fa5, 32(sp)
-; RV64-i32-NEXT: fld fa4, 40(sp)
-; RV64-i32-NEXT: fld fa3, 48(sp)
-; RV64-i32-NEXT: fld fa2, 56(sp)
-; RV64-i32-NEXT: fcvt.l.d a4, fa5
-; RV64-i32-NEXT: fcvt.l.d a5, fa4
-; RV64-i32-NEXT: fcvt.l.d a6, fa3
; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-i32-NEXT: vmv.v.x v8, a0
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a1
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a2
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a3
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a4
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a5
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a6
-; RV64-i32-NEXT: fcvt.l.d a0, fa2
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i32-NEXT: addi sp, s0, -128
-; RV64-i32-NEXT: .cfi_def_cfa sp, 128
-; RV64-i32-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
-; RV64-i32-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
-; RV64-i32-NEXT: .cfi_restore ra
-; RV64-i32-NEXT: .cfi_restore s0
-; RV64-i32-NEXT: addi sp, sp, 128
-; RV64-i32-NEXT: .cfi_def_cfa_offset 0
+; RV64-i32-NEXT: vfncvt.x.f.w v12, v8
+; RV64-i32-NEXT: vmv.v.v v8, v12
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v8f64:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: addi sp, sp, -192
-; RV64-i64-NEXT: .cfi_def_cfa_offset 192
-; RV64-i64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; RV64-i64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; RV64-i64-NEXT: .cfi_offset ra, -8
-; RV64-i64-NEXT: .cfi_offset s0, -16
-; RV64-i64-NEXT: addi s0, sp, 192
-; RV64-i64-NEXT: .cfi_def_cfa s0, 0
-; RV64-i64-NEXT: andi sp, sp, -64
-; RV64-i64-NEXT: mv a0, sp
-; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-i64-NEXT: vse64.v v8, (a0)
-; RV64-i64-NEXT: fld fa5, 56(sp)
-; RV64-i64-NEXT: vfmv.f.s fa4, v8
-; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 1
-; RV64-i64-NEXT: fcvt.l.d a0, fa4
-; RV64-i64-NEXT: fcvt.l.d a1, fa5
-; RV64-i64-NEXT: sd a1, 120(sp)
-; RV64-i64-NEXT: fld fa5, 48(sp)
-; RV64-i64-NEXT: vfmv.f.s fa4, v10
-; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-i64-NEXT: fcvt.l.d a1, fa4
-; RV64-i64-NEXT: fcvt.l.d a2, fa5
-; RV64-i64-NEXT: sd a2, 112(sp)
-; RV64-i64-NEXT: fld fa5, 40(sp)
-; RV64-i64-NEXT: vfmv.f.s fa4, v10
-; RV64-i64-NEXT: fcvt.l.d a2, fa4
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-i64-NEXT: fcvt.l.d a3, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: sd a3, 104(sp)
-; RV64-i64-NEXT: fcvt.l.d a3, fa5
-; RV64-i64-NEXT: fld fa5, 32(sp)
-; RV64-i64-NEXT: sd a0, 64(sp)
-; RV64-i64-NEXT: sd a1, 72(sp)
-; RV64-i64-NEXT: sd a3, 80(sp)
-; RV64-i64-NEXT: sd a2, 88(sp)
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: sd a0, 96(sp)
-; RV64-i64-NEXT: addi a0, sp, 64
; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-i64-NEXT: vle64.v v8, (a0)
-; RV64-i64-NEXT: addi sp, s0, -192
-; RV64-i64-NEXT: .cfi_def_cfa sp, 192
-; RV64-i64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; RV64-i64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; RV64-i64-NEXT: .cfi_restore ra
-; RV64-i64-NEXT: .cfi_restore s0
-; RV64-i64-NEXT: addi sp, sp, 192
-; RV64-i64-NEXT: .cfi_def_cfa_offset 0
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
; RV64-i64-NEXT: ret
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
ret <8 x iXLen> %a
More information about the llvm-commits
mailing list