[llvm] [VP][RISCV] Introduce vp.lrint/llrint and RISC-V support. (PR #82627)
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 06:56:45 PST 2024
https://github.com/yetingk created https://github.com/llvm/llvm-project/pull/82627
RISC-V implements vector lrint/llrint by vfcvt.x.f.v.
>From 3af6b131e71c27aa8c069fde06deb264fad094d1 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Thu, 22 Feb 2024 14:07:53 +0800
Subject: [PATCH] [VP][RISCV] Introduce vp.lrint/llrint and RISC-V support.
RISC-V implements vector lrint/llrint by vfcvt.x.f.v.
---
llvm/docs/LangRef.rst | 99 ++
llvm/include/llvm/IR/Intrinsics.td | 8 +
llvm/include/llvm/IR/VPIntrinsics.def | 12 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 14 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +-
.../RISCV/rvv/fixed-vectors-llrint-vp.ll | 987 ++++++++++++++++++
.../RISCV/rvv/fixed-vectors-lrint-vp.ll | 233 +++++
llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll | 110 ++
llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll | 209 ++++
9 files changed, 1678 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index fd2e3aacd0169c..5385057ba657c0 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15990,6 +15990,8 @@ functions would, but without setting errno. If the rounded value is
too large to be stored in the result type, the return value is a
non-deterministic value (equivalent to `freeze poison`).
+.. _int_lrint:
+
'``llvm.lrint.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -16035,6 +16037,8 @@ would, but without setting errno. If the rounded value is too large to
be stored in the result type, the return value is a non-deterministic
value (equivalent to `freeze poison`).
+.. _int_llrint:
+
'``llvm.llrint.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -23344,6 +23348,101 @@ Examples:
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
+.. _int_vp_lrint:
+
+'``llvm.vp.lrint.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <16 x i32> @llvm.vp.lrint..v16i32.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
+ declare <vscale x 4 x i32> @llvm.vp.lrint.nxv4i32.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+ declare <256 x i64> @llvm.vp.lrint.v256i64.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated lrint of a vector of floating-point values.
+
+
+Arguments:
+""""""""""
+
+The result is an intger vector and the first operand is an float vector and has
+the same number of elements as the result vector type. The second operand is the
+vector mask and has the same number of elements as the result vector type. The
+third operand is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.lrint``' intrinsic performs lrint (:ref:`lrint <int_lrint>`) of
+the first vector operand on each enabled lane. The result on disabled lanes is a
+:ref:`poison value <poisonvalues>`.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %r = call <4 x i32> @llvm.vp.lrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
+ ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+ %t = call <4 x i32> @llvm.lrint.v4f32(<4 x float> %a)
+ %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+.. _int_vp_llrint:
+
+'``llvm.vp.llrint.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <16 x i32> @llvm.vp.llrint..v16i32.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
+ declare <vscale x 4 x i32> @llvm.vp.llrint.nxv4i32.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+ declare <256 x i64> @llvm.vp.llrint.v256i64.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated llrint of a vector of floating-point values.
+
+
+Arguments:
+""""""""""
+
+The result is an intger vector and the first operand is an float vector and has
+the same number of elements as the result vector type. The second operand is the
+vector mask and has the same number of elements as the result vector type. The
+third operand is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.llrint``' intrinsic performs lrint (:ref:`llrint <int_llrint>`) of
+the first vector operand on each enabled lane. The result on disabled lanes is a
+:ref:`poison value <poisonvalues>`.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %r = call <4 x i32> @llvm.vp.llrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
+ ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+ %t = call <4 x i32> @llvm.llrint.v4f32(<4 x float> %a)
+ %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+
.. _int_vp_bitreverse:
'``llvm.vp.bitreverse.*``' Intrinsics
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 4becdd71cd440d..ab91abacac6a13 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2036,6 +2036,14 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
+ def int_vp_lrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_llrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
// Casts
def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 3b32b60609f536..c320f768ce6d82 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -437,6 +437,18 @@ VP_PROPERTY_FUNCTIONAL_INTRINSIC(nearbyint)
VP_PROPERTY_FUNCTIONAL_SDOPC(FNEARBYINT)
END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT)
+// llvm.vp.lrint(x,mask,vlen)
+BEGIN_REGISTER_VP(vp_lrint, 1, 2, VP_LRINT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(lrint)
+VP_PROPERTY_FUNCTIONAL_SDOPC(LRINT)
+END_REGISTER_VP(vp_lrint, VP_LRINT)
+
+// llvm.vp.llrint(x,mask,vlen)
+BEGIN_REGISTER_VP(vp_llrint, 1, 2, VP_LLRINT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(llrint)
+VP_PROPERTY_FUNCTIONAL_SDOPC(LLRINT)
+END_REGISTER_VP(vp_llrint, VP_LLRINT)
+
///// } Floating-Point Arithmetic
///// Type Casts {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7fc252600534f0..87a5ab9cfecb0d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1102,7 +1102,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FRINT:
case ISD::VP_FRINT:
case ISD::LRINT:
+ case ISD::VP_LRINT:
case ISD::LLRINT:
+ case ISD::VP_LLRINT:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
@@ -4263,6 +4265,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LRINT:
case ISD::LLRINT:
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
Res = WidenVecRes_XRINT(N);
break;
@@ -4869,7 +4873,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
if (WidenNumElts != SrcVT.getVectorElementCount())
return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
- return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2));
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4f809ccf7b3019..e6578eeea2c9fe 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -699,7 +699,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
- ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE,
+ ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
+ ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
ISD::EXPERIMENTAL_VP_SPLICE};
static const unsigned IntegerVecReduceOps[] = {
@@ -5690,6 +5691,9 @@ static unsigned getRISCVVLOp(SDValue Op) {
switch (Op.getOpcode()) {
default:
llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
+ return RISCVISD::VFCVT_X_F_VL;
OP_CASE(ADD)
OP_CASE(SUB)
OP_CASE(MUL)
@@ -6793,6 +6797,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_UDIV:
case ISD::VP_SREM:
case ISD::VP_UREM:
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
return lowerVPOp(Op, DAG);
case ISD::VP_AND:
case ISD::VP_OR:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
new file mode 100644
index 00000000000000..119d0d75497c43
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
@@ -0,0 +1,987 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v1i64_v1f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v1i64_v1f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: ret
+ %a = call <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float> %x, <1 x i1> %m, i32 %evl)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float>, <1 x i1>, i32)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v2i64_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v2i64_v2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a1, fa5
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: ret
+ %a = call <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float> %x, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v3i64_v3f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v3i64_v3f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a1, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: ret
+ %a = call <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
+ ret <3 x i64> %a
+}
+declare <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float>, <3 x i1>, i32)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v4i64_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v4i64_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a1, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: ret
+ %a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float>, <4 x i1>, i32)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v8i64_v8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -208
+; RV32-NEXT: .cfi_def_cfa_offset 208
+; RV32-NEXT: sw ra, 204(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 200(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 208
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 68(sp)
+; RV32-NEXT: sw a0, 64(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 124(sp)
+; RV32-NEXT: sw a0, 120(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 6
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 116(sp)
+; RV32-NEXT: sw a0, 112(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 5
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 108(sp)
+; RV32-NEXT: sw a0, 104(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 4
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 100(sp)
+; RV32-NEXT: sw a0, 96(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 92(sp)
+; RV32-NEXT: sw a0, 88(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 84(sp)
+; RV32-NEXT: sw a0, 80(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 192
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 76(sp)
+; RV32-NEXT: sw a0, 72(sp)
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: addi sp, s0, -208
+; RV32-NEXT: lw ra, 204(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 200(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 208
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v8i64_v8f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -128
+; RV64-NEXT: .cfi_def_cfa_offset 128
+; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 128
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 0(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 7
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 56(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 6
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 48(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 5
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 40(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 4
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 32(sp)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 24(sp)
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 16(sp)
+; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: mv a0, sp
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi sp, s0, -128
+; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 128
+; RV64-NEXT: ret
+ %a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float>, <8 x i1>, i32)
+
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v16i64_v16f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -400
+; RV32-NEXT: .cfi_def_cfa_offset 400
+; RV32-NEXT: sw ra, 396(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 392(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 400
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: flw fa0, 124(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 252(sp)
+; RV32-NEXT: sw a0, 248(sp)
+; RV32-NEXT: flw fa0, 120(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 244(sp)
+; RV32-NEXT: sw a0, 240(sp)
+; RV32-NEXT: flw fa0, 116(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 236(sp)
+; RV32-NEXT: sw a0, 232(sp)
+; RV32-NEXT: flw fa0, 112(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 228(sp)
+; RV32-NEXT: sw a0, 224(sp)
+; RV32-NEXT: flw fa0, 108(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 220(sp)
+; RV32-NEXT: sw a0, 216(sp)
+; RV32-NEXT: flw fa0, 104(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 212(sp)
+; RV32-NEXT: sw a0, 208(sp)
+; RV32-NEXT: flw fa0, 100(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 204(sp)
+; RV32-NEXT: sw a0, 200(sp)
+; RV32-NEXT: flw fa0, 96(sp)
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 196(sp)
+; RV32-NEXT: sw a0, 192(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 132(sp)
+; RV32-NEXT: sw a0, 128(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 156(sp)
+; RV32-NEXT: sw a0, 152(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 148(sp)
+; RV32-NEXT: sw a0, 144(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 140(sp)
+; RV32-NEXT: sw a0, 136(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 188(sp)
+; RV32-NEXT: sw a0, 184(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 6
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 180(sp)
+; RV32-NEXT: sw a0, 176(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 5
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 172(sp)
+; RV32-NEXT: sw a0, 168(sp)
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 384
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 4
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: sw a1, 164(sp)
+; RV32-NEXT: sw a0, 160(sp)
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: addi a1, sp, 128
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vle32.v v8, (a1)
+; RV32-NEXT: addi sp, s0, -400
+; RV32-NEXT: lw ra, 396(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 392(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 400
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v16i64_v16f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -384
+; RV64-NEXT: .cfi_def_cfa_offset 384
+; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 384
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT: vse32.v v8, (a0)
+; RV64-NEXT: flw fa5, 124(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 248(sp)
+; RV64-NEXT: flw fa5, 120(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 240(sp)
+; RV64-NEXT: flw fa5, 116(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 232(sp)
+; RV64-NEXT: flw fa5, 112(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 224(sp)
+; RV64-NEXT: flw fa5, 108(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 216(sp)
+; RV64-NEXT: flw fa5, 104(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 208(sp)
+; RV64-NEXT: flw fa5, 100(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 200(sp)
+; RV64-NEXT: flw fa5, 96(sp)
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 192(sp)
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 128(sp)
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 152(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 144(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 136(sp)
+; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 7
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 184(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 6
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 176(sp)
+; RV64-NEXT: vslidedown.vi v10, v8, 5
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 168(sp)
+; RV64-NEXT: vslidedown.vi v8, v8, 4
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: sd a0, 160(sp)
+; RV64-NEXT: addi a0, sp, 128
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi sp, s0, -384
+; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 384
+; RV64-NEXT: ret
+ %a = call <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float>, <16 x i1>, i32)
+
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v1i64_v1f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v1i64_v1f64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: ret
+ %a = call <1 x i64> @llvm.vp.llrint.v1i64.v1f64(<1 x double> %x, <1 x i1> %m, i32 %evl)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.vp.llrint.v1i64.v1f64(<1 x double>, <1 x i1>, i32)
+
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v2i64_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v2i64_v2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a1, fa5
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: ret
+ %a = call <2 x i64> @llvm.vp.llrint.v2i64.v2f64(<2 x double> %x, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.vp.llrint.v2i64.v2f64(<2 x double>, <2 x i1>, i32)
+
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v4i64_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v4i64_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a1, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v12, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v12
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: ret
+ %a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f64(<4 x double> %x, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.vp.llrint.v4i64.v4f64(<4 x double>, <4 x i1>, i32)
+
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v8i64_v8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -272
+; RV32-NEXT: .cfi_def_cfa_offset 272
+; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 272
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: fld fa0, 120(sp)
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 188(sp)
+; RV32-NEXT: sw a0, 184(sp)
+; RV32-NEXT: fld fa0, 112(sp)
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 180(sp)
+; RV32-NEXT: sw a0, 176(sp)
+; RV32-NEXT: fld fa0, 104(sp)
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 172(sp)
+; RV32-NEXT: sw a0, 168(sp)
+; RV32-NEXT: fld fa0, 96(sp)
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 164(sp)
+; RV32-NEXT: sw a0, 160(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 132(sp)
+; RV32-NEXT: sw a0, 128(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 140(sp)
+; RV32-NEXT: sw a0, 136(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 156(sp)
+; RV32-NEXT: sw a0, 152(sp)
+; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV32-NEXT: addi a0, sp, 256
+; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrint
+; RV32-NEXT: sw a1, 148(sp)
+; RV32-NEXT: sw a0, 144(sp)
+; RV32-NEXT: addi a0, sp, 128
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: addi sp, s0, -272
+; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 272
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v8i64_v8f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -192
+; RV64-NEXT: .cfi_def_cfa_offset 192
+; RV64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 192
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: mv a0, sp
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: fld fa5, 56(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 120(sp)
+; RV64-NEXT: fld fa5, 48(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 112(sp)
+; RV64-NEXT: fld fa5, 40(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 104(sp)
+; RV64-NEXT: fld fa5, 32(sp)
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 96(sp)
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 64(sp)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 72(sp)
+; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v10
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 88(sp)
+; RV64-NEXT: vslidedown.vi v8, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: sd a0, 80(sp)
+; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi sp, s0, -192
+; RV64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 192
+; RV64-NEXT: ret
+ %a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f64(<8 x double> %x, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.vp.llrint.v8i64.v8f64(<8 x double>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
new file mode 100644
index 00000000000000..08dd1c79f24c9b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
@@ -0,0 +1,233 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \
+; RUN: -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64
+
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v1f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v1f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v1f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8, v0.t
+; RV64-i64-NEXT: vmv1r.v v8, v9
+; RV64-i64-NEXT: ret
+ %a = call <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f32(<1 x float> %x, <1 x i1> %m, i32 %evl)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f32(<1 x float>, <1 x i1>, i32)
+
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v2f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v2f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8, v0.t
+; RV64-i64-NEXT: vmv1r.v v8, v9
+; RV64-i64-NEXT: ret
+ %a = call <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f32(<2 x float> %x, <2 x i1> %m, i32 %evl)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <3 x iXLen> @lrint_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v3f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v3f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v3f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
+; RV64-i64-NEXT: vmv2r.v v8, v10
+; RV64-i64-NEXT: ret
+ %a = call <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
+ ret <3 x iXLen> %a
+}
+declare <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float>, <3 x i1>, i32)
+
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v4f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v4f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
+; RV64-i64-NEXT: vmv2r.v v8, v10
+; RV64-i64-NEXT: ret
+ %a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float>, <4 x i1>, i32)
+
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v8f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v8f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v12, v8, v0.t
+; RV64-i64-NEXT: vmv4r.v v8, v12
+; RV64-i64-NEXT: ret
+ %a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float>, <8 x i1>, i32)
+
+define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) {
+ %a = call <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
+ ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float>, <16 x i1>, i32)
+
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v1f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v1f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v9, v8, v0.t
+; RV64-i32-NEXT: vmv1r.v v8, v9
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v1f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT: ret
+ %a = call <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f64(<1 x double> %x, <1 x i1> %m, i32 %evl)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f64(<1 x double>, <1 x i1>, i32)
+
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v2f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v9, v8, v0.t
+; RV64-i32-NEXT: vmv1r.v v8, v9
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v2f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT: ret
+ %a = call <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f64(<2 x double> %x, <2 x i1> %m, i32 %evl)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f64(<2 x double>, <2 x i1>, i32)
+
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v10, v8, v0.t
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v4f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v10, v8, v0.t
+; RV64-i32-NEXT: vmv.v.v v8, v10
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v4f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT: ret
+ %a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f64(<4 x double> %x, <4 x i1> %m, i32 %evl)
+ ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f64(<4 x double>, <4 x i1>, i32)
+
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v12, v8, v0.t
+; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v8f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v12, v8, v0.t
+; RV64-i32-NEXT: vmv.v.v v8, v12
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v8f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT: ret
+ %a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f64(<8 x double> %x, <8 x i1> %m, i32 %evl)
+ ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f64(<8 x double>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
new file mode 100644
index 00000000000000..47a3e20925068d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x i64> @llrint_nxv1i64_nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv1i64_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %a
+}
+declare <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv2i64_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v10, v8
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %a
+}
+declare <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv4i64_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v12, v8
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %a = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %a
+}
+declare <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv8i64_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v16, v8
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+ %a = call <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %a
+}
+declare <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
+
+define <vscale x 16 x i64> @llrint_nxv16i64_nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv16i64_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v24, v8
+; CHECK-NEXT: vfwcvt.x.f.v v16, v12
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+ %a = call <vscale x 16 x i64> @llvm.vp.llrint.nxv16i64.nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i64> %a
+}
+declare <vscale x 16 x i64> @llvm.vp.llrint.nxv16i64.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
+
+define <vscale x 1 x i64> @llrint_nxv1i64_nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv1i64_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %a
+}
+declare <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv2i64_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %a
+}
+declare <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv4i64_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %a
+}
+declare <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv8i64_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %a
+}
+declare <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
new file mode 100644
index 00000000000000..235f4f792ebab8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
@@ -0,0 +1,209 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \
+; RUN: -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64
+
+define <vscale x 1 x iXLen> @lrint_nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv1f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv1f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv1f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8
+; RV64-i64-NEXT: vmv1r.v v8, v9
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x iXLen> %a
+}
+declare <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x iXLen> @lrint_nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv2f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv2f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8
+; RV64-i64-NEXT: vmv2r.v v8, v10
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x iXLen> %a
+}
+declare <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x iXLen> @lrint_nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv4f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv4f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v12, v8
+; RV64-i64-NEXT: vmv4r.v v8, v12
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x iXLen> %a
+}
+declare <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x iXLen> @lrint_nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v8
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv8f32:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv8f32:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v16, v8
+; RV64-i64-NEXT: vmv8r.v v8, v16
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x iXLen> %a
+}
+declare <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
+
+define <vscale x 16 x iXLen> @lrint_nxv16iXLen_nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+ %a = call <vscale x 16 x iXLen> @llvm.vp.lrint.nxv16iXLen.nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x iXLen> %a
+}
+declare <vscale x 16 x iXLen> @llvm.vp.lrint.nxv16iXLen.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
+
+define <vscale x 1 x iXLen> @lrint_nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv1f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v9, v8
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv1f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v9, v8
+; RV64-i32-NEXT: vmv1r.v v8, v9
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv1f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x iXLen> %a
+}
+declare <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x iXLen> @lrint_nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v10, v8
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv2f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v10, v8
+; RV64-i32-NEXT: vmv.v.v v8, v10
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv2f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x iXLen> %a
+}
+declare <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x iXLen> @lrint_nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v12, v8
+; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv4f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v12, v8
+; RV64-i32-NEXT: vmv.v.v v8, v12
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv4f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x iXLen> %a
+}
+declare <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x iXLen> @lrint_nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; RV32-NEXT: vfncvt.x.f.w v16, v8
+; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_nxv8f64:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; RV64-i32-NEXT: vfncvt.x.f.w v16, v8
+; RV64-i32-NEXT: vmv.v.v v8, v16
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_nxv8f64:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; RV64-i64-NEXT: vfcvt.x.f.v v8, v8
+; RV64-i64-NEXT: ret
+ %a = call <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x iXLen> %a
+}
+declare <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
More information about the llvm-commits
mailing list