[llvm] 5e1d81a - LegalizeIntegerTypes: implement PromoteIntRes for xrint (#71055)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 2 08:54:00 PDT 2023
Author: Ramkumar Ramachandra
Date: 2023-11-02T15:53:56Z
New Revision: 5e1d81ac680482eef401f944bccb6cf03bc7115f
URL: https://github.com/llvm/llvm-project/commit/5e1d81ac680482eef401f944bccb6cf03bc7115f
DIFF: https://github.com/llvm/llvm-project/commit/5e1d81ac680482eef401f944bccb6cf03bc7115f.diff
LOG: LegalizeIntegerTypes: implement PromoteIntRes for xrint (#71055)
Recently, 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT;
custom RISCV lowering) introduced vector variants of llvm.lrint,
llvm.llrint, and bundled several tests along with the code change.
However, it forgot to test lrint and llrint on fixed vectors on RISC-V,
and it turns out that that fixed-vectors-lrint.ll requires
PromoteIntRes_XRINT to be implemented. Implement it, and add tests for
fixed-vector lrint, llrint.
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 2d2585b3db73226..39d3b1ce59b02b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -301,6 +301,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FFREXP:
Res = PromoteIntRes_FFREXP(N);
break;
+
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = PromoteIntRes_XRINT(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -783,6 +788,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_XRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ee4278ceb729b61..c48d6c4adf61517 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -326,6 +326,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
+ SDValue PromoteIntRes_XRINT(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
new file mode 100644
index 000000000000000..ff83e7c8c32ae9b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -0,0 +1,978 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+; RV32-LABEL: llrint_v1i64_v1f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v1i64_v1f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+; RV32-LABEL: llrint_v2i64_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v2i64_v2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vslide1down.vx v9, v8, a0
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
+
+define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
+; RV32-LABEL: llrint_v3i64_v3f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v3i64_v3f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: ret
+ %a = call <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float> %x)
+ ret <3 x i64> %a
+}
+declare <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float>)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+; RV32-LABEL: llrint_v4i64_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v4i64_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+; RV32-LABEL: llrint_v8i64_v8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -208
+; RV32-NEXT: .cfi_def_cfa_offset 208
+; RV32-NEXT: sw ra, 204(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 200(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 208
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 68(sp)
+; RV32-NEXT: sw a0, 64(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 124(sp)
+; RV32-NEXT: sw a0, 120(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 6
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 116(sp)
+; RV32-NEXT: sw a0, 112(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 5
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 108(sp)
+; RV32-NEXT: sw a0, 104(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 4
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 100(sp)
+; RV32-NEXT: sw a0, 96(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 92(sp)
+; RV32-NEXT: sw a0, 88(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 84(sp)
+; RV32-NEXT: sw a0, 80(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 76(sp)
+; RV32-NEXT: sw a0, 72(sp)
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: addi sp, s0, -208
+; RV32-NEXT: lw ra, 204(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 200(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 208
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v8i64_v8f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -128
+; RV64-NEXT: .cfi_def_cfa_offset 128
+; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 128
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 0(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 7
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 56(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 6
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 48(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 5
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 40(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 4
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 32(sp)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 24(sp)
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 16(sp)
+; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: mv a0, sp
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi sp, s0, -128
+; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 128
+; RV64-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
+
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+; RV32-LABEL: llrint_v16i64_v16f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -400
+; RV32-NEXT: .cfi_def_cfa_offset 400
+; RV32-NEXT: sw ra, 396(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 392(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 400
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: flw fa0, 124(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 252(sp)
+; RV32-NEXT: sw a0, 248(sp)
+; RV32-NEXT: flw fa0, 120(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 244(sp)
+; RV32-NEXT: sw a0, 240(sp)
+; RV32-NEXT: flw fa0, 116(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 236(sp)
+; RV32-NEXT: sw a0, 232(sp)
+; RV32-NEXT: flw fa0, 112(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 228(sp)
+; RV32-NEXT: sw a0, 224(sp)
+; RV32-NEXT: flw fa0, 108(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 220(sp)
+; RV32-NEXT: sw a0, 216(sp)
+; RV32-NEXT: flw fa0, 104(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 212(sp)
+; RV32-NEXT: sw a0, 208(sp)
+; RV32-NEXT: flw fa0, 100(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 204(sp)
+; RV32-NEXT: sw a0, 200(sp)
+; RV32-NEXT: flw fa0, 96(sp)
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 196(sp)
+; RV32-NEXT: sw a0, 192(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 132(sp)
+; RV32-NEXT: sw a0, 128(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 156(sp)
+; RV32-NEXT: sw a0, 152(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 148(sp)
+; RV32-NEXT: sw a0, 144(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 140(sp)
+; RV32-NEXT: sw a0, 136(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 188(sp)
+; RV32-NEXT: sw a0, 184(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 6
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 180(sp)
+; RV32-NEXT: sw a0, 176(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 5
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 172(sp)
+; RV32-NEXT: sw a0, 168(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 4
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf at plt
+; RV32-NEXT: sw a1, 164(sp)
+; RV32-NEXT: sw a0, 160(sp)
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: addi a1, sp, 128
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vle32.v v8, (a1)
+; RV32-NEXT: addi sp, s0, -400
+; RV32-NEXT: lw ra, 396(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 392(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 400
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v16i64_v16f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -384
+; RV64-NEXT: .cfi_def_cfa_offset 384
+; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 384
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT: vse32.v v8, (a0)
+; RV64-NEXT: flw fa5, 124(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 248(sp)
+; RV64-NEXT: flw fa5, 120(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 240(sp)
+; RV64-NEXT: flw fa5, 116(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 232(sp)
+; RV64-NEXT: flw fa5, 112(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 224(sp)
+; RV64-NEXT: flw fa5, 108(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 216(sp)
+; RV64-NEXT: flw fa5, 104(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 208(sp)
+; RV64-NEXT: flw fa5, 100(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 200(sp)
+; RV64-NEXT: flw fa5, 96(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 192(sp)
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 128(sp)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 152(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 144(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 136(sp)
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 7
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 184(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 6
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 176(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 5
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 168(sp)
+; RV64-NEXT: vslidedown.vi v8, v8, 4
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 160(sp)
+; RV64-NEXT: addi a0, sp, 128
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi sp, s0, -384
+; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 384
+; RV64-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
+
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+; RV32-LABEL: llrint_v1i64_v1f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v1i64_v1f64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
+
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+; RV32-LABEL: llrint_v2i64_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v2i64_v2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vslide1down.vx v9, v8, a0
+; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
+
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+; RV32-LABEL: llrint_v4i64_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v4i64_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v12, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v12
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v12, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v12
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
+
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+; RV32-LABEL: llrint_v8i64_v8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -272
+; RV32-NEXT: .cfi_def_cfa_offset 272
+; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 272
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: fld fa0, 120(sp)
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 188(sp)
+; RV32-NEXT: sw a0, 184(sp)
+; RV32-NEXT: fld fa0, 112(sp)
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 180(sp)
+; RV32-NEXT: sw a0, 176(sp)
+; RV32-NEXT: fld fa0, 104(sp)
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 172(sp)
+; RV32-NEXT: sw a0, 168(sp)
+; RV32-NEXT: fld fa0, 96(sp)
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 164(sp)
+; RV32-NEXT: sw a0, 160(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 132(sp)
+; RV32-NEXT: sw a0, 128(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 140(sp)
+; RV32-NEXT: sw a0, 136(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 156(sp)
+; RV32-NEXT: sw a0, 152(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint at plt
+; RV32-NEXT: sw a1, 148(sp)
+; RV32-NEXT: sw a0, 144(sp)
+; RV32-NEXT: addi a0, sp, 128
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: addi sp, s0, -272
+; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 272
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v8i64_v8f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -192
+; RV64-NEXT: .cfi_def_cfa_offset 192
+; RV64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 192
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: mv a0, sp
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: fld fa5, 56(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 120(sp)
+; RV64-NEXT: fld fa5, 48(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 112(sp)
+; RV64-NEXT: fld fa5, 40(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 104(sp)
+; RV64-NEXT: fld fa5, 32(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 96(sp)
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 64(sp)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 72(sp)
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 88(sp)
+; RV64-NEXT: vslidedown.vi v8, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 80(sp)
+; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi sp, s0, -192
+; RV64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 192
+; RV64-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
new file mode 100644
index 000000000000000..cd4eec44920c951
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -0,0 +1,714 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \
+; RUN: -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64
+
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+; RV32-LABEL: lrint_v1f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v1f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vmv.s.x v8, a0
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v1f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i64-NEXT: vmv.s.x v8, a0
+; RV64-i64-NEXT: ret
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
+
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+; RV32-LABEL: lrint_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v9, v8, a0
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v2f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v2f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v9, v8, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i64-NEXT: ret
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
+
+define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
+; RV32-LABEL: lrint_v3f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v9, v8, a0
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v10
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vslidedown.vi v10, v8, 2
+; RV32-NEXT: vfmv.f.s fa5, v10
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v3f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v9, v9, a0
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 2
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 1
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v3f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-i64-NEXT: ret
+ %a = call <3 x iXLen> @llvm.lrint.v3iXLen.v3f32(<3 x float> %x)
+ ret <3 x iXLen> %a
+}
+declare <3 x iXLen> @llvm.lrint.v3iXLen.v3f32(<3 x float>)
+
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+; RV32-LABEL: lrint_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v9, v8, a0
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v10
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vslidedown.vi v10, v8, 2
+; RV32-NEXT: vfmv.f.s fa5, v10
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v4f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v9, v9, a0
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v9, v9, a0
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v4f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-i64-NEXT: ret
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
+
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+; RV32-LABEL: lrint_v8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vslide1down.vx v10, v8, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 2
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 3
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 4
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 5
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 6
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v10, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v8f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v10, v8, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 3
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 4
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 5
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 6
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 7
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v8, v10, a0
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v8f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: addi sp, sp, -128
+; RV64-i64-NEXT: .cfi_def_cfa_offset 128
+; RV64-i64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-i64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-i64-NEXT: .cfi_offset ra, -8
+; RV64-i64-NEXT: .cfi_offset s0, -16
+; RV64-i64-NEXT: addi s0, sp, 128
+; RV64-i64-NEXT: .cfi_def_cfa s0, 0
+; RV64-i64-NEXT: andi sp, sp, -64
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 0(sp)
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 7
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 56(sp)
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 6
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 48(sp)
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 5
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 40(sp)
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 4
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 32(sp)
+; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 3
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 24(sp)
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 16(sp)
+; RV64-i64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: sd a0, 8(sp)
+; RV64-i64-NEXT: mv a0, sp
+; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-i64-NEXT: vle64.v v8, (a0)
+; RV64-i64-NEXT: addi sp, s0, -128
+; RV64-i64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-i64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-i64-NEXT: addi sp, sp, 128
+; RV64-i64-NEXT: ret
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
+
+define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
+
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+; RV32-LABEL: lrint_v1f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v1f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i32-NEXT: vmv.s.x v8, a0
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v1f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vmv.s.x v8, a0
+; RV64-i64-NEXT: ret
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
+
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+; RV32-LABEL: lrint_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v9, v8, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v2f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v2f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vslide1down.vx v9, v8, a0
+; RV64-i64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i64-NEXT: ret
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
+
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+; RV32-LABEL: lrint_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v8, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v11, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v11
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 2
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v10, a0
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v4f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v8, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v11, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v11
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v8, v10, a0
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v4f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vslide1down.vx v10, v8, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v12, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v12
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v12, v8, 2
+; RV64-i64-NEXT: vfmv.f.s fa5, v12
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-i64-NEXT: ret
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
+
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+; RV32-LABEL: lrint_v8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -128
+; RV32-NEXT: .cfi_def_cfa_offset 128
+; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 128
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: mv a0, sp
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v8, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 2
+; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: fld fa4, 32(sp)
+; RV32-NEXT: fld fa3, 40(sp)
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: fld fa5, 48(sp)
+; RV32-NEXT: fcvt.w.d a1, fa4
+; RV32-NEXT: fcvt.w.d a2, fa3
+; RV32-NEXT: vslide1down.vx v8, v10, a0
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: fld fa5, 56(sp)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: addi sp, s0, -128
+; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 128
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v8f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: addi sp, sp, -128
+; RV64-i32-NEXT: .cfi_def_cfa_offset 128
+; RV64-i32-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-i32-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-i32-NEXT: .cfi_offset ra, -8
+; RV64-i32-NEXT: .cfi_offset s0, -16
+; RV64-i32-NEXT: addi s0, sp, 128
+; RV64-i32-NEXT: .cfi_def_cfa s0, 0
+; RV64-i32-NEXT: andi sp, sp, -64
+; RV64-i32-NEXT: mv a0, sp
+; RV64-i32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-i32-NEXT: vse64.v v8, (a0)
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v8, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
+; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: fld fa4, 32(sp)
+; RV64-i32-NEXT: fld fa3, 40(sp)
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: fld fa5, 48(sp)
+; RV64-i32-NEXT: fcvt.l.d a1, fa4
+; RV64-i32-NEXT: fcvt.l.d a2, fa3
+; RV64-i32-NEXT: vslide1down.vx v8, v10, a0
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: fld fa5, 56(sp)
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a1
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a2
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
+; RV64-i32-NEXT: addi sp, s0, -128
+; RV64-i32-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-i32-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-i32-NEXT: addi sp, sp, 128
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v8f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: addi sp, sp, -192
+; RV64-i64-NEXT: .cfi_def_cfa_offset 192
+; RV64-i64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
+; RV64-i64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; RV64-i64-NEXT: .cfi_offset ra, -8
+; RV64-i64-NEXT: .cfi_offset s0, -16
+; RV64-i64-NEXT: addi s0, sp, 192
+; RV64-i64-NEXT: .cfi_def_cfa s0, 0
+; RV64-i64-NEXT: andi sp, sp, -64
+; RV64-i64-NEXT: mv a0, sp
+; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-i64-NEXT: vse64.v v8, (a0)
+; RV64-i64-NEXT: fld fa5, 56(sp)
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 120(sp)
+; RV64-i64-NEXT: fld fa5, 48(sp)
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 112(sp)
+; RV64-i64-NEXT: fld fa5, 40(sp)
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 104(sp)
+; RV64-i64-NEXT: fld fa5, 32(sp)
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 96(sp)
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 64(sp)
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 72(sp)
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 3
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 88(sp)
+; RV64-i64-NEXT: vslidedown.vi v8, v8, 2
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: sd a0, 80(sp)
+; RV64-i64-NEXT: addi a0, sp, 64
+; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-i64-NEXT: vle64.v v8, (a0)
+; RV64-i64-NEXT: addi sp, s0, -192
+; RV64-i64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
+; RV64-i64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; RV64-i64-NEXT: addi sp, sp, 192
+; RV64-i64-NEXT: ret
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
More information about the llvm-commits
mailing list