[llvm] [VP][RISCV] Introduce vp.lrint/llrint and RISC-V support. (PR #82627)

Thu Feb 22 19:22:16 PST 2024

https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/82627

>From 033e24a43b45de6c052d8922680c9539e0757b35 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Thu, 22 Feb 2024 14:07:53 +0800
Subject: [PATCH] [VP][RISCV] Introduce vp.lrint/llrint and RISC-V support.

RISC-V implements vector lrint/llrint by vfcvt.x.f.v.
---
 llvm/docs/LangRef.rst                         |  98 ++++++++
 llvm/include/llvm/IR/Intrinsics.td            |   8 +
 llvm/include/llvm/IR/VPIntrinsics.def         |  12 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  14 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  10 +-
 .../RISCV/rvv/fixed-vectors-llrint-vp.ll      | 187 ++++++++++++++
 .../RISCV/rvv/fixed-vectors-lrint-vp.ll       | 233 ++++++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll      | 125 ++++++++++
 llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll       | 209 ++++++++++++++++
 llvm/unittests/IR/VPIntrinsicTest.cpp         |   4 +
 10 files changed, 898 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index fd2e3aacd0169c..bea1e71d94a9a0 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15990,6 +15990,8 @@ functions would, but without setting errno. If the rounded value is
 too large to be stored in the result type, the return value is a
 non-deterministic value (equivalent to `freeze poison`).
 
+.. _int_lrint:
+
 '``llvm.lrint.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -16035,6 +16037,8 @@ would, but without setting errno. If the rounded value is too large to
 be stored in the result type, the return value is a non-deterministic
 value (equivalent to `freeze poison`).
 
+.. _int_llrint:
+
 '``llvm.llrint.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -23344,6 +23348,100 @@ Examples:
       %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
       %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
 
+.. _int_vp_lrint:
+
+'``llvm.vp.lrint.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i32> @llvm.vp.lrint.v16i32.v16f32(<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x i32> @llvm.vp.lrint.nxv4i32.nxv4f32(<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x i64> @llvm.vp.lrint.v256i64.v256f64(<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated lrint of a vector of floating-point values.
+
+
+Arguments:
+""""""""""
+
+The result is an integer vector and the first operand is a vector of :ref:`floating-point <t_floating>`
+type with the same number of elements as the result vector type. The second
+operand is the vector mask and has the same number of elements as the result
+vector type. The third operand is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.lrint``' intrinsic performs lrint (:ref:`lrint <int_lrint>`) of
+the first vector operand on each enabled lane. The result on disabled lanes is a
+:ref:`poison value <poisonvalues>`.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i32> @llvm.vp.lrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x i32> @llvm.lrint.v4f32(<4 x float> %a)
+      %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+.. _int_vp_llrint:
+
+'``llvm.vp.llrint.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i32> @llvm.vp.llrint.v16i32.v16f32(<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x i32> @llvm.vp.llrint.nxv4i32.nxv4f32(<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x i64> @llvm.vp.llrint.v256i64.v256f64(<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated llrint of a vector of floating-point values.
+
+
+Arguments:
+""""""""""
+The result is an integer vector and the first operand is a vector of :ref:`floating-point <t_floating>`
+type with the same number of elements as the result vector type. The second
+operand is the vector mask and has the same number of elements as the result
+vector type. The third operand is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.llrint``' intrinsic performs lrint (:ref:`llrint <int_llrint>`) of
+the first vector operand on each enabled lane. The result on disabled lanes is a
+:ref:`poison value <poisonvalues>`.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i32> @llvm.vp.llrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x i32> @llvm.llrint.v4f32(<4 x float> %a)
+      %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+
 .. _int_vp_bitreverse:
 
 '``llvm.vp.bitreverse.*``' Intrinsics
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 4becdd71cd440d..ab91abacac6a13 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2036,6 +2036,14 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
                              [ LLVMMatchType<0>,
                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                llvm_i32_ty]>;
+  def int_vp_lrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                             [ llvm_anyvector_ty,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_llrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                             [ llvm_anyvector_ty,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
 
   // Casts
   def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 3b32b60609f536..c320f768ce6d82 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -437,6 +437,18 @@ VP_PROPERTY_FUNCTIONAL_INTRINSIC(nearbyint)
 VP_PROPERTY_FUNCTIONAL_SDOPC(FNEARBYINT)
 END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT)
 
+// llvm.vp.lrint(x,mask,vlen)
+BEGIN_REGISTER_VP(vp_lrint, 1, 2, VP_LRINT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(lrint)
+VP_PROPERTY_FUNCTIONAL_SDOPC(LRINT)
+END_REGISTER_VP(vp_lrint, VP_LRINT)
+
+// llvm.vp.llrint(x,mask,vlen)
+BEGIN_REGISTER_VP(vp_llrint, 1, 2, VP_LLRINT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(llrint)
+VP_PROPERTY_FUNCTIONAL_SDOPC(LLRINT)
+END_REGISTER_VP(vp_llrint, VP_LLRINT)
+
 ///// } Floating-Point Arithmetic
 
 ///// Type Casts {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7fc252600534f0..87a5ab9cfecb0d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1102,7 +1102,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FRINT:
   case ISD::VP_FRINT:
   case ISD::LRINT:
+  case ISD::VP_LRINT:
   case ISD::LLRINT:
+  case ISD::VP_LLRINT:
   case ISD::FROUND:
   case ISD::VP_FROUND:
   case ISD::FROUNDEVEN:
@@ -4263,6 +4265,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 
   case ISD::LRINT:
   case ISD::LLRINT:
+  case ISD::VP_LRINT:
+  case ISD::VP_LLRINT:
     Res = WidenVecRes_XRINT(N);
     break;
 
@@ -4869,7 +4873,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
   if (WidenNumElts != SrcVT.getVectorElementCount())
     return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
 
-  return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+  if (N->getNumOperands() == 1)
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+
+  assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+  assert(N->isVPOpcode() && "Expected VP opcode");
+
+  SDValue Mask =
+      GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+  return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2));
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4f809ccf7b3019..5fea8aaac0d775 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -699,7 +699,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         ISD::VP_FCEIL,       ISD::VP_FFLOOR,      ISD::VP_FROUND,
         ISD::VP_FROUNDEVEN,  ISD::VP_FCOPYSIGN,   ISD::VP_FROUNDTOZERO,
         ISD::VP_FRINT,       ISD::VP_FNEARBYINT,  ISD::VP_IS_FPCLASS,
-        ISD::VP_FMINIMUM,    ISD::VP_FMAXIMUM,    ISD::EXPERIMENTAL_VP_REVERSE,
+        ISD::VP_FMINIMUM,    ISD::VP_FMAXIMUM,    ISD::VP_LRINT,
+        ISD::VP_LLRINT,      ISD::EXPERIMENTAL_VP_REVERSE,
         ISD::EXPERIMENTAL_VP_SPLICE};
 
     static const unsigned IntegerVecReduceOps[] = {
@@ -5807,6 +5808,11 @@ static unsigned getRISCVVLOp(SDValue Op) {
   case ISD::FMAXNUM:
   case ISD::VP_FMAXNUM:
     return RISCVISD::VFMAX_VL;
+  case ISD::LRINT:
+  case ISD::VP_LRINT:
+  case ISD::LLRINT:
+  case ISD::VP_LLRINT:
+    return RISCVISD::VFCVT_X_F_VL;
   }
   // clang-format on
 #undef OP_CASE
@@ -6793,6 +6799,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VP_UDIV:
   case ISD::VP_SREM:
   case ISD::VP_UREM:
+  case ISD::VP_LRINT:
+  case ISD::VP_LLRINT:
     return lowerVPOp(Op, DAG);
   case ISD::VP_AND:
   case ISD::VP_OR:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
new file mode 100644
index 00000000000000..8282b8884aed65
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v1i64_v1f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v1i64_v1f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    ret
+  %a = call <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float> %x, <1 x i1> %m, i32 %evl)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float>, <1 x i1>, i32)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v2i64_v2f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v2i64_v2f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    ret
+  %a = call <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float> %x, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v3i64_v3f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vmv2r.v v8, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v3i64_v3f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vmv2r.v v8, v10
+; RV64-NEXT:    ret
+  %a = call <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
+  ret <3 x i64> %a
+}
+declare <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float>, <3 x i1>, i32)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v4i64_v4f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vmv2r.v v8, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v4i64_v4f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vmv2r.v v8, v10
+; RV64-NEXT:    ret
+  %a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float>, <4 x i1>, i32)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v8i64_v8f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vmv4r.v v8, v12
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v8i64_v8f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vmv4r.v v8, v12
+; RV64-NEXT:    ret
+  %a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float>, <8 x i1>, i32)
+
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v16i64_v16f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vmv8r.v v8, v16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v16i64_v16f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vmv8r.v v8, v16
+; RV64-NEXT:    ret
+  %a = call <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float>, <16 x i1>, i32)
+
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v1i64_v1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v1i64_v1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-NEXT:    ret
+  %a = call <1 x i64> @llvm.vp.llrint.v1i64.v1f64(<1 x double> %x, <1 x i1> %m, i32 %evl)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.vp.llrint.v1i64.v1f64(<1 x double>, <1 x i1>, i32)
+
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v2i64_v2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v2i64_v2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-NEXT:    ret
+  %a = call <2 x i64> @llvm.vp.llrint.v2i64.v2f64(<2 x double> %x, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.vp.llrint.v2i64.v2f64(<2 x double>, <2 x i1>, i32)
+
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v4i64_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v4i64_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-NEXT:    ret
+  %a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f64(<4 x double> %x, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.vp.llrint.v4i64.v4f64(<4 x double>, <4 x i1>, i32)
+
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v8i64_v8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v8i64_v8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-NEXT:    ret
+  %a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f64(<8 x double> %x, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.vp.llrint.v8i64.v8f64(<8 x double>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
new file mode 100644
index 00000000000000..08dd1c79f24c9b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
@@ -0,0 +1,233 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \
+; RUN:     -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN:     -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN:     -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64
+
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v1f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v1f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v1f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; RV64-i64-NEXT:    vmv1r.v v8, v9
+; RV64-i64-NEXT:    ret
+  %a = call <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f32(<1 x float> %x, <1 x i1> %m, i32 %evl)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f32(<1 x float>, <1 x i1>, i32)
+
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v2f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v2f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v2f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; RV64-i64-NEXT:    vmv1r.v v8, v9
+; RV64-i64-NEXT:    ret
+  %a = call <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f32(<2 x float> %x, <2 x i1> %m, i32 %evl)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <3 x iXLen> @lrint_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v3f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v3f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v3f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; RV64-i64-NEXT:    vmv2r.v v8, v10
+; RV64-i64-NEXT:    ret
+  %a = call <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
+  ret <3 x iXLen> %a
+}
+declare <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float>, <3 x i1>, i32)
+
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v4f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v4f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v4f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; RV64-i64-NEXT:    vmv2r.v v8, v10
+; RV64-i64-NEXT:    ret
+  %a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float>, <4 x i1>, i32)
+
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v8f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v8f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v8f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v12, v8, v0.t
+; RV64-i64-NEXT:    vmv4r.v v8, v12
+; RV64-i64-NEXT:    ret
+  %a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float>, <8 x i1>, i32)
+
+define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) {
+  %a = call <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
+  ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float>, <16 x i1>, i32)
+
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v1f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
+; RV64-i32-NEXT:    vmv1r.v v8, v9
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v1f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f64(<1 x double> %x, <1 x i1> %m, i32 %evl)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f64(<1 x double>, <1 x i1>, i32)
+
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v2f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
+; RV64-i32-NEXT:    vmv1r.v v8, v9
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v2f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f64(<2 x double> %x, <2 x i1> %m, i32 %evl)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f64(<2 x double>, <2 x i1>, i32)
+
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v4f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
+; RV64-i32-NEXT:    vmv.v.v v8, v10
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v4f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f64(<4 x double> %x, <4 x i1> %m, i32 %evl)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f64(<4 x double>, <4 x i1>, i32)
+
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_v8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_v8f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
+; RV64-i32-NEXT:    vmv.v.v v8, v12
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_v8f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f64(<8 x double> %x, <8 x i1> %m, i32 %evl)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f64(<8 x double>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
new file mode 100644
index 00000000000000..6d8763d34ec1b0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x i64> @llrint_nxv1i64_nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv1i64_nxv1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %a
+}
+declare <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv2i64_nxv2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = call <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %a
+}
+declare <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv4i64_nxv4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vfwcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %a = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %a
+}
+declare <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv8i64_nxv8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vfwcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %a = call <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %a
+}
+declare <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
+
+define <vscale x 16 x i64> @llrint_nxv16i64_nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv16i64_nxv16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 3
+; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT:    vfwcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT:    bltu a0, a1, .LBB4_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB4_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vfwcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v24
+; CHECK-NEXT:    ret
+  %a = call <vscale x 16 x i64> @llvm.vp.llrint.nxv16i64.nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i64> %a
+}
+declare <vscale x 16 x i64> @llvm.vp.llrint.nxv16i64.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
+
+define <vscale x 1 x i64> @llrint_nxv1i64_nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv1i64_nxv1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %a
+}
+declare <vscale x 1 x i64> @llvm.vp.llrint.nxv1i64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv2i64_nxv2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %a = call <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %a
+}
+declare <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv4i64_nxv4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %a = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %a
+}
+declare <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: llrint_nxv8i64_nxv8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %a = call <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %a
+}
+declare <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
new file mode 100644
index 00000000000000..8a826fb3ac1eaf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
@@ -0,0 +1,209 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \
+; RUN:     -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN:     -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN:     -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64
+
+define <vscale x 1 x iXLen> @lrint_nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv1f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv1f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv1f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v9, v8, v0.t
+; RV64-i64-NEXT:    vmv1r.v v8, v9
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f32(<vscale x 1 x float> %x, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x iXLen> %a
+}
+declare <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x iXLen> @lrint_nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv2f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv2f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv2f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v10, v8, v0.t
+; RV64-i64-NEXT:    vmv2r.v v8, v10
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x iXLen> %a
+}
+declare <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x iXLen> @lrint_nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv4f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv4f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv4f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v12, v8, v0.t
+; RV64-i64-NEXT:    vmv4r.v v8, v12
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x iXLen> %a
+}
+declare <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x iXLen> @lrint_nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv8f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv8f32:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV64-i32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv8f32:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV64-i64-NEXT:    vfwcvt.x.f.v v16, v8, v0.t
+; RV64-i64-NEXT:    vmv8r.v v8, v16
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x iXLen> %a
+}
+declare <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
+
+define <vscale x 16 x iXLen> @lrint_nxv16iXLen_nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+  %a = call <vscale x 16 x iXLen> @llvm.vp.lrint.nxv16iXLen.nxv16f32(<vscale x 16 x float> %x, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x iXLen> %a
+}
+declare <vscale x 16 x iXLen> @llvm.vp.lrint.nxv16iXLen.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
+
+define <vscale x 1 x iXLen> @lrint_nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv1f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
+; RV64-i32-NEXT:    vmv1r.v v8, v9
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv1f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x iXLen> %a
+}
+declare <vscale x 1 x iXLen> @llvm.vp.lrint.nxv1iXLen.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
+
+define <vscale x 2 x iXLen> @lrint_nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv2f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
+; RV64-i32-NEXT:    vmv.v.v v8, v10
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv2f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x iXLen> %a
+}
+declare <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
+
+define <vscale x 4 x iXLen> @lrint_nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv4f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
+; RV64-i32-NEXT:    vmv.v.v v8, v12
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv4f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f64(<vscale x 4 x double> %x, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x iXLen> %a
+}
+declare <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
+
+define <vscale x 8 x iXLen> @lrint_nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: lrint_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV32-NEXT:    vfncvt.x.f.w v16, v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v16
+; RV32-NEXT:    ret
+;
+; RV64-i32-LABEL: lrint_nxv8f64:
+; RV64-i32:       # %bb.0:
+; RV64-i32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV64-i32-NEXT:    vfncvt.x.f.w v16, v8, v0.t
+; RV64-i32-NEXT:    vmv.v.v v8, v16
+; RV64-i32-NEXT:    ret
+;
+; RV64-i64-LABEL: lrint_nxv8f64:
+; RV64-i64:       # %bb.0:
+; RV64-i64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-i64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
+; RV64-i64-NEXT:    ret
+  %a = call <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f64(<vscale x 8 x double> %x, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x iXLen> %a
+}
+declare <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index 7a9d91cc76b34a..a93eb4de2c948e 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -72,6 +72,10 @@ class VPIntrinsicTest : public testing::Test {
            "i32)";
     Str << " declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, "
            "i32)";
+    Str << " declare <8 x i32> @llvm.vp.lrint.v8i32.v8f32(<8 x float>, "
+           "<8 x i1>, i32)";
+    Str << " declare <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float>, "
+           "<8 x i1>, i32)";
     Str << " declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, "
            "i32)";
     Str << " declare <8 x float> @llvm.vp.fabs.v8f32(<8 x float>, <8 x i1>, "