[llvm] [RISCV] Be more aggressive about forming floating point constants (PR #68433)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 26 13:41:04 PDT 2023


https://github.com/preames updated https://github.com/llvm/llvm-project/pull/68433

>From 1cb8598859d910942d945f43d8bebc567c72015a Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Wed, 25 Oct 2023 14:52:01 -0700
Subject: [PATCH 1/4] Revert "Revert "[RISCV] Shrink vslideup's LMUL when
 lowering fixed insert_subvector  (#65997)""

This reverts commit 3a6cc52fe3501b2b7b3aabdff305a18122c9e0db.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  17 +
 .../rvv/fixed-vectors-insert-subvector.ll     |  45 ++-
 .../rvv/fixed-vectors-strided-load-combine.ll |  80 ++---
 .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll     | 330 ++++++++----------
 4 files changed, 228 insertions(+), 244 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index beb371063f89b2d..6c56236241d5c55 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8920,6 +8920,17 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
       return DAG.getBitcast(Op.getValueType(), SubVec);
     }
 
+    // Shrink down Vec so we're performing the slideup on a smaller LMUL.
+    unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
+    MVT OrigContainerVT = ContainerVT;
+    SDValue OrigVec = Vec;
+    if (auto ShrunkVT =
+            getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
+      ContainerVT = *ShrunkVT;
+      Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+                        DAG.getVectorIdxConstant(0, DL));
+    }
+
     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
                          DAG.getUNDEF(ContainerVT), SubVec,
                          DAG.getConstant(0, DL, XLenVT));
@@ -8946,6 +8957,12 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
                            SlideupAmt, Mask, VL, Policy);
     }
 
+    // If we performed the slideup on a smaller LMUL, insert the result back
+    // into the rest of the vector.
+    if (ContainerVT != OrigContainerVT)
+      SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+                           SubVec, DAG.getVectorIdxConstant(0, DL));
+
     if (VecVT.isFixedLengthVector())
       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
     return DAG.getBitcast(Op.getValueType(), SubVec);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index a77c49c942561b6..15669f03e893d74 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -14,7 +14,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, ptr %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e32, m4, tu, ma
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %sv = load <2 x i32>, ptr %svp
@@ -27,7 +27,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, ptr %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; CHECK-NEXT:    vslideup.vi v8, v12, 2
 ; CHECK-NEXT:    ret
   %sv = load <2 x i32>, ptr %svp
@@ -40,7 +40,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e32, m4, tu, ma
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v8, v12, 6
 ; CHECK-NEXT:    ret
   %sv = load <2 x i32>, ptr %svp
@@ -51,22 +51,19 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
 define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
 ; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0:
 ; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v12, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m4, tu, ma
-; LMULMAX2-NEXT:    vmv.v.v v8, v12
+; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, tu, ma
+; LMULMAX2-NEXT:    vle32.v v8, (a0)
 ; LMULMAX2-NEXT:    ret
 ;
 ; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0:
 ; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    addi a1, a0, 16
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v16, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vmv.v.v v8, v12
-; LMULMAX1-NEXT:    vsetivli zero, 8, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v16, 4
+; LMULMAX1-NEXT:    vle32.v v12, (a1)
+; LMULMAX1-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
+; LMULMAX1-NEXT:    vle32.v v8, (a0)
+; LMULMAX1-NEXT:    vsetivli zero, 8, e32, m2, tu, ma
+; LMULMAX1-NEXT:    vslideup.vi v8, v12, 4
 ; LMULMAX1-NEXT:    ret
   %sv = load <8 x i32>, ptr %svp
   %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
@@ -84,14 +81,14 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, ptr %
 ;
 ; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 16
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v12, (a1)
+; LMULMAX1-NEXT:    vle32.v v12, (a0)
+; LMULMAX1-NEXT:    addi a0, a0, 16
 ; LMULMAX1-NEXT:    vle32.v v16, (a0)
 ; LMULMAX1-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v16, 8
+; LMULMAX1-NEXT:    vslideup.vi v8, v12, 8
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v12, 12
+; LMULMAX1-NEXT:    vslideup.vi v8, v16, 12
 ; LMULMAX1-NEXT:    ret
   %sv = load <8 x i32>, ptr %svp
   %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
@@ -166,7 +163,7 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
 ; LMULMAX2-NEXT:    vle32.v v8, (a1)
 ; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; LMULMAX2-NEXT:    vle32.v v10, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; LMULMAX2-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; LMULMAX2-NEXT:    vmv.v.v v10, v8
 ; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; LMULMAX2-NEXT:    vse32.v v10, (a0)
@@ -197,7 +194,7 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
 ; LMULMAX2-NEXT:    vle32.v v8, (a1)
 ; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; LMULMAX2-NEXT:    vle32.v v10, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; LMULMAX2-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; LMULMAX2-NEXT:    vslideup.vi v10, v8, 2
 ; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; LMULMAX2-NEXT:    vse32.v v10, (a0)
@@ -508,9 +505,9 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, <vscale x 16 x i64>* %o
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v16, (a1)
-; CHECK-NEXT:    vsetivli zero, 6, e64, m8, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v16, 4
+; CHECK-NEXT:    vle64.v v12, (a1)
+; CHECK-NEXT:    vsetivli zero, 6, e64, m4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v12, 4
 ; CHECK-NEXT:    vs8r.v v8, (a2)
 ; CHECK-NEXT:    ret
   %sv0 = load <2 x i64>, ptr %psv0
@@ -539,7 +536,7 @@ define void @insert_v2i64_nxv16i64_lo2(ptr %psv, <vscale x 16 x i64>* %out) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m8, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vslideup.vi v16, v8, 2
 ; CHECK-NEXT:    vs8r.v v16, (a1)
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
index f52ba6f51d5c897..805557905117add 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
@@ -27,13 +27,13 @@ define void @widen_3xv4i16(ptr %x, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    addi a2, a0, 8
-; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    vle16.v v9, (a2)
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
 ; CHECK-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v12, 8
+; CHECK-NEXT:    vslideup.vi v8, v10, 8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <4 x i16>, ptr %x
@@ -75,17 +75,17 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) {
 ; CHECK-NO-MISALIGN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NO-MISALIGN-NEXT:    vle8.v v8, (a0)
 ; CHECK-NO-MISALIGN-NEXT:    addi a2, a0, 8
-; CHECK-NO-MISALIGN-NEXT:    vle8.v v10, (a2)
+; CHECK-NO-MISALIGN-NEXT:    vle8.v v9, (a2)
 ; CHECK-NO-MISALIGN-NEXT:    addi a2, a0, 16
-; CHECK-NO-MISALIGN-NEXT:    vle8.v v12, (a2)
+; CHECK-NO-MISALIGN-NEXT:    vle8.v v10, (a2)
 ; CHECK-NO-MISALIGN-NEXT:    addi a0, a0, 24
-; CHECK-NO-MISALIGN-NEXT:    vle8.v v14, (a0)
-; CHECK-NO-MISALIGN-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; CHECK-NO-MISALIGN-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NO-MISALIGN-NEXT:    vle8.v v12, (a0)
+; CHECK-NO-MISALIGN-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
+; CHECK-NO-MISALIGN-NEXT:    vslideup.vi v8, v9, 4
 ; CHECK-NO-MISALIGN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; CHECK-NO-MISALIGN-NEXT:    vslideup.vi v8, v12, 8
+; CHECK-NO-MISALIGN-NEXT:    vslideup.vi v8, v10, 8
 ; CHECK-NO-MISALIGN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NO-MISALIGN-NEXT:    vslideup.vi v8, v14, 12
+; CHECK-NO-MISALIGN-NEXT:    vslideup.vi v8, v12, 12
 ; CHECK-NO-MISALIGN-NEXT:    vse16.v v8, (a1)
 ; CHECK-NO-MISALIGN-NEXT:    ret
 ;
@@ -188,17 +188,17 @@ define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    addi a2, a0, 2
-; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    vle16.v v9, (a2)
 ; CHECK-NEXT:    addi a2, a0, 6
-; CHECK-NEXT:    vle16.v v12, (a2)
+; CHECK-NEXT:    vle16.v v10, (a2)
 ; CHECK-NEXT:    addi a0, a0, 8
-; CHECK-NEXT:    vle16.v v14, (a0)
-; CHECK-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
 ; CHECK-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v12, 8
+; CHECK-NEXT:    vslideup.vi v8, v10, 8
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v14, 12
+; CHECK-NEXT:    vslideup.vi v8, v12, 12
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <4 x i16>, ptr %x
@@ -258,17 +258,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
 ; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; RV32-NEXT:    vle16.v v8, (a0)
 ; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    vle16.v v10, (a0)
+; RV32-NEXT:    vle16.v v9, (a0)
 ; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    vle16.v v12, (a0)
+; RV32-NEXT:    vle16.v v10, (a0)
 ; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    vle16.v v14, (a0)
-; RV32-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 4
+; RV32-NEXT:    vle16.v v12, (a0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v9, 4
 ; RV32-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v12, 8
+; RV32-NEXT:    vslideup.vi v8, v10, 8
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v14, 12
+; RV32-NEXT:    vslideup.vi v8, v12, 12
 ; RV32-NEXT:    vse16.v v8, (a1)
 ; RV32-NEXT:    ret
 ;
@@ -277,17 +277,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
 ; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; RV64-NEXT:    vle16.v v8, (a0)
 ; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    vle16.v v10, (a0)
+; RV64-NEXT:    vle16.v v9, (a0)
 ; RV64-NEXT:    add a0, a0, a3
-; RV64-NEXT:    vle16.v v12, (a0)
+; RV64-NEXT:    vle16.v v10, (a0)
 ; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    vle16.v v14, (a0)
-; RV64-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 4
+; RV64-NEXT:    vle16.v v12, (a0)
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v9, 4
 ; RV64-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v12, 8
+; RV64-NEXT:    vslideup.vi v8, v10, 8
 ; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v14, 12
+; RV64-NEXT:    vslideup.vi v8, v12, 12
 ; RV64-NEXT:    vse16.v v8, (a1)
 ; RV64-NEXT:    ret
 ;
@@ -296,17 +296,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
 ; ZVE64F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVE64F-NEXT:    vle16.v v8, (a0)
 ; ZVE64F-NEXT:    add a0, a0, a2
-; ZVE64F-NEXT:    vle16.v v10, (a0)
+; ZVE64F-NEXT:    vle16.v v9, (a0)
 ; ZVE64F-NEXT:    add a0, a0, a3
-; ZVE64F-NEXT:    vle16.v v12, (a0)
+; ZVE64F-NEXT:    vle16.v v10, (a0)
 ; ZVE64F-NEXT:    add a0, a0, a2
-; ZVE64F-NEXT:    vle16.v v14, (a0)
-; ZVE64F-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; ZVE64F-NEXT:    vslideup.vi v8, v10, 4
+; ZVE64F-NEXT:    vle16.v v12, (a0)
+; ZVE64F-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; ZVE64F-NEXT:    vslideup.vi v8, v9, 4
 ; ZVE64F-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; ZVE64F-NEXT:    vslideup.vi v8, v12, 8
+; ZVE64F-NEXT:    vslideup.vi v8, v10, 8
 ; ZVE64F-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVE64F-NEXT:    vslideup.vi v8, v14, 12
+; ZVE64F-NEXT:    vslideup.vi v8, v12, 12
 ; ZVE64F-NEXT:    vse16.v v8, (a1)
 ; ZVE64F-NEXT:    ret
   %a = load <4 x i16>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 7497051027fa372..6cfa504b501bacb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -460,54 +460,49 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-V-NEXT:    sub sp, sp, a1
 ; CHECK-V-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb
 ; CHECK-V-NEXT:    lhu s0, 24(a0)
-; CHECK-V-NEXT:    lhu s1, 16(a0)
-; CHECK-V-NEXT:    lhu s2, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s1, 0(a0)
+; CHECK-V-NEXT:    lhu s2, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 16(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT:    fmv.w.x fa0, s2
-; CHECK-V-NEXT:    call __extendhfsf2 at plt
-; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
 ; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    fmv.w.x fa0, s2
+; CHECK-V-NEXT:    call __extendhfsf2 at plt
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    fmv.w.x fa0, s1
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
 ; CHECK-V-NEXT:    lui a0, 524288
@@ -632,54 +627,49 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-V-NEXT:    sub sp, sp, a1
 ; CHECK-V-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb
 ; CHECK-V-NEXT:    lhu s0, 24(a0)
-; CHECK-V-NEXT:    lhu s1, 16(a0)
-; CHECK-V-NEXT:    lhu s2, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s1, 0(a0)
+; CHECK-V-NEXT:    lhu s2, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 16(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT:    fmv.w.x fa0, s2
-; CHECK-V-NEXT:    call __extendhfsf2 at plt
-; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
 ; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    fmv.w.x fa0, s2
+; CHECK-V-NEXT:    call __extendhfsf2 at plt
+; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    fmv.w.x fa0, s1
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
 ; CHECK-V-NEXT:    li a0, -1
@@ -813,54 +803,49 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-V-NEXT:    sub sp, sp, a1
 ; CHECK-V-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb
 ; CHECK-V-NEXT:    lhu s0, 24(a0)
-; CHECK-V-NEXT:    lhu s1, 16(a0)
-; CHECK-V-NEXT:    lhu s2, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s1, 0(a0)
+; CHECK-V-NEXT:    lhu s2, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 16(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT:    fmv.w.x fa0, s2
-; CHECK-V-NEXT:    call __extendhfsf2 at plt
-; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
 ; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    fmv.w.x fa0, s2
+; CHECK-V-NEXT:    call __extendhfsf2 at plt
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    fmv.w.x fa0, s1
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
 ; CHECK-V-NEXT:    li a0, -1
@@ -1454,8 +1439,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    lhu s3, 32(a0)
 ; CHECK-V-NEXT:    lhu s4, 24(a0)
 ; CHECK-V-NEXT:    lhu s5, 16(a0)
-; CHECK-V-NEXT:    lhu s6, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s6, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 0(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
@@ -1466,16 +1451,16 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s6
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s5
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -1484,7 +1469,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s4
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -1737,8 +1722,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    lhu s3, 32(a0)
 ; CHECK-V-NEXT:    lhu s4, 24(a0)
 ; CHECK-V-NEXT:    lhu s5, 16(a0)
-; CHECK-V-NEXT:    lhu s6, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s6, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 0(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
@@ -1749,16 +1734,16 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s6
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s5
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -1767,7 +1752,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s4
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -2040,8 +2025,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    lhu s3, 32(a0)
 ; CHECK-V-NEXT:    lhu s4, 24(a0)
 ; CHECK-V-NEXT:    lhu s5, 16(a0)
-; CHECK-V-NEXT:    lhu s6, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s6, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 0(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
@@ -2052,16 +2037,16 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s6
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s5
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -2070,7 +2055,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s4
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -3796,54 +3781,49 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-V-NEXT:    sub sp, sp, a1
 ; CHECK-V-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb
 ; CHECK-V-NEXT:    lhu s0, 24(a0)
-; CHECK-V-NEXT:    lhu s1, 16(a0)
-; CHECK-V-NEXT:    lhu s2, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s1, 0(a0)
+; CHECK-V-NEXT:    lhu s2, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 16(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT:    fmv.w.x fa0, s2
-; CHECK-V-NEXT:    call __extendhfsf2 at plt
-; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
 ; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    fmv.w.x fa0, s2
+; CHECK-V-NEXT:    call __extendhfsf2 at plt
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    fmv.w.x fa0, s1
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
 ; CHECK-V-NEXT:    lui a0, 524288
@@ -3966,54 +3946,49 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-V-NEXT:    sub sp, sp, a1
 ; CHECK-V-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb
 ; CHECK-V-NEXT:    lhu s0, 24(a0)
-; CHECK-V-NEXT:    lhu s1, 16(a0)
-; CHECK-V-NEXT:    lhu s2, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s1, 0(a0)
+; CHECK-V-NEXT:    lhu s2, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 16(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT:    fmv.w.x fa0, s2
-; CHECK-V-NEXT:    call __extendhfsf2 at plt
-; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
 ; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    fmv.w.x fa0, s2
+; CHECK-V-NEXT:    call __extendhfsf2 at plt
+; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    fmv.w.x fa0, s1
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
 ; CHECK-V-NEXT:    li a0, -1
@@ -4146,54 +4121,49 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-V-NEXT:    sub sp, sp, a1
 ; CHECK-V-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb
 ; CHECK-V-NEXT:    lhu s0, 24(a0)
-; CHECK-V-NEXT:    lhu s1, 16(a0)
-; CHECK-V-NEXT:    lhu s2, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s1, 0(a0)
+; CHECK-V-NEXT:    lhu s2, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 16(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT:    fmv.w.x fa0, s2
-; CHECK-V-NEXT:    call __extendhfsf2 at plt
-; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
 ; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    fmv.w.x fa0, s2
+; CHECK-V-NEXT:    call __extendhfsf2 at plt
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    fmv.w.x fa0, s1
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
-; CHECK-V-NEXT:    csrr a0, vlenb
-; CHECK-V-NEXT:    slli a0, a0, 1
-; CHECK-V-NEXT:    add a0, sp, a0
-; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 3
 ; CHECK-V-NEXT:    li a0, -1
@@ -4775,8 +4745,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    lhu s3, 32(a0)
 ; CHECK-V-NEXT:    lhu s4, 24(a0)
 ; CHECK-V-NEXT:    lhu s5, 16(a0)
-; CHECK-V-NEXT:    lhu s6, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s6, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 0(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
@@ -4787,16 +4757,16 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s6
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s5
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -4805,7 +4775,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s4
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -5054,8 +5024,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    lhu s3, 32(a0)
 ; CHECK-V-NEXT:    lhu s4, 24(a0)
 ; CHECK-V-NEXT:    lhu s5, 16(a0)
-; CHECK-V-NEXT:    lhu s6, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s6, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 0(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
@@ -5066,16 +5036,16 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s6
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s5
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -5084,7 +5054,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s4
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -5356,8 +5326,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    lhu s3, 32(a0)
 ; CHECK-V-NEXT:    lhu s4, 24(a0)
 ; CHECK-V-NEXT:    lhu s5, 16(a0)
-; CHECK-V-NEXT:    lhu s6, 0(a0)
-; CHECK-V-NEXT:    lhu a0, 8(a0)
+; CHECK-V-NEXT:    lhu s6, 8(a0)
+; CHECK-V-NEXT:    lhu a0, 0(a0)
 ; CHECK-V-NEXT:    fmv.w.x fa0, a0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
@@ -5368,16 +5338,16 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s6
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 1
-; CHECK-V-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-V-NEXT:    fmv.w.x fa0, s5
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 3, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
@@ -5386,7 +5356,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    fmv.w.x fa0, s4
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
-; CHECK-V-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
 ; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload

>From 59c9ef464d90bb3d394bfecc5635bab72beaf1de Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 5 Oct 2023 09:59:08 -0700
Subject: [PATCH 2/4] [RISCV] Be more aggressive about forming floating point
 constants

We were being very conservative about forming floating point constants via an integer materialization sequence and a fmv.  With the default threshold of 2, we'd only do so if the bit sequence could be produced with a single instruction (LUI, ADDI, or sometimes BSETI).

This change removes the separate threshold entirely, and ties the floating point expansion costing to the integer costing threshold.  The effect of this is that the default threshold increases by 2, and that more sequences are materialized via integers - avoiding constant pool loads.  This is sufficient to cover all constants of types fp16, bf16, and fp32.  Many f64 constants are covered as welll, but not all.

One downside of this change is that double constants for ELEN=64 configurations on rv32 can't be formed via the same integer sequences.  This causes a forking in the test coverage which is more than a tad ugly.  Ideas on how to reduce this, or restructure tests to avoid it are more than welcome.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   10 +-
 llvm/test/CodeGen/RISCV/bfloat-convert.ll     |  195 ++-
 llvm/test/CodeGen/RISCV/bfloat-imm.ll         |    9 +-
 llvm/test/CodeGen/RISCV/calling-conv-half.ll  |   45 +-
 llvm/test/CodeGen/RISCV/codemodel-lowering.ll |   15 +-
 llvm/test/CodeGen/RISCV/double-convert.ll     |   98 +-
 llvm/test/CodeGen/RISCV/double-imm.ll         |   52 +-
 llvm/test/CodeGen/RISCV/double-intrinsics.ll  |   54 +-
 llvm/test/CodeGen/RISCV/double-round-conv.ll  |   45 +-
 llvm/test/CodeGen/RISCV/double-zfa.ll         |  104 +-
 llvm/test/CodeGen/RISCV/float-convert.ll      |   98 +-
 llvm/test/CodeGen/RISCV/float-imm.ll          |   14 +-
 .../CodeGen/RISCV/float-round-conv-sat.ll     |   90 +-
 llvm/test/CodeGen/RISCV/half-convert.ll       |  976 ++++++------
 llvm/test/CodeGen/RISCV/half-imm.ll           |   43 +-
 llvm/test/CodeGen/RISCV/half-intrinsics.ll    |   54 +-
 .../test/CodeGen/RISCV/half-round-conv-sat.ll |  370 +++--
 llvm/test/CodeGen/RISCV/half-round-conv.ll    |  375 ++---
 llvm/test/CodeGen/RISCV/half-zfa-fli.ll       |   60 +-
 .../CodeGen/RISCV/repeated-fp-divisors.ll     |    5 +-
 llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll        |  862 ++++++----
 .../CodeGen/RISCV/rvv/double-round-conv.ll    |  160 +-
 .../RISCV/rvv/fceil-constrained-sdnode.ll     |  234 ++-
 llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll   |  210 ++-
 .../RISCV/rvv/ffloor-constrained-sdnode.ll    |  234 ++-
 llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll  |  210 ++-
 .../RISCV/rvv/fixed-vectors-ceil-vp.ll        |  851 ++++++----
 .../fixed-vectors-fceil-constrained-sdnode.ll |  234 ++-
 ...fixed-vectors-ffloor-constrained-sdnode.ll |  234 ++-
 .../RISCV/rvv/fixed-vectors-floor-vp.ll       | 1403 +++++++++++++----
 ...d-vectors-fnearbyint-constrained-sdnode.ll |   73 +-
 .../RISCV/rvv/fixed-vectors-fp-shuffles.ll    |  112 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     |  742 +++++++--
 .../RISCV/rvv/fixed-vectors-fp2i-sat.ll       |  110 +-
 .../fixed-vectors-fround-costrained-sdnode.ll |  234 ++-
 .../CodeGen/RISCV/rvv/fixed-vectors-fround.ll |  338 +++-
 ...d-vectors-froundeven-constrained-sdnode.ll |  234 ++-
 .../RISCV/rvv/fixed-vectors-froundeven.ll     |  338 +++-
 ...fixed-vectors-ftrunc-constrained-sdnode.ll |   86 +-
 .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll   |  291 +---
 .../RISCV/rvv/fixed-vectors-rint-vp.ll        |  761 ++++++---
 .../RISCV/rvv/fixed-vectors-round-vp.ll       | 1403 +++++++++++++----
 .../RISCV/rvv/fixed-vectors-roundeven-vp.ll   |  299 +---
 .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll |  299 +---
 llvm/test/CodeGen/RISCV/rvv/floor-vp.ll       |  862 ++++++----
 .../rvv/fnearbyint-constrained-sdnode.ll      |   94 +-
 .../CodeGen/RISCV/rvv/fnearbyint-sdnode.ll    |   86 +-
 llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll     |   36 -
 llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll     |   20 +-
 llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll   |  186 ++-
 .../RISCV/rvv/fround-costrained-sdnode.ll     |  234 ++-
 llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll  |  210 ++-
 .../rvv/froundeven-constrained-sdnode.ll      |   94 +-
 .../CodeGen/RISCV/rvv/froundeven-sdnode.ll    |   86 +-
 .../RISCV/rvv/ftrunc-constrained-sdnode.ll    |   86 +-
 llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll  |   78 +-
 .../test/CodeGen/RISCV/rvv/half-round-conv.ll |  399 +----
 llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll   |  329 +---
 llvm/test/CodeGen/RISCV/rvv/rint-vp.ll        | 1278 ++++++++++++---
 llvm/test/CodeGen/RISCV/rvv/round-vp.ll       |  318 +---
 llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll   |  318 +---
 llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll |  318 +---
 .../test/CodeGen/RISCV/rvv/vfma-vp-combine.ll |   25 -
 .../RISCV/rvv/vreductions-fp-sdnode.ll        |    8 +-
 .../RISCV/rvv/vsetvli-insert-crossbb.ll       |   24 +-
 llvm/test/CodeGen/RISCV/zfbfmin.ll            |    5 +-
 66 files changed, 10507 insertions(+), 7251 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6c56236241d5c55..1049532a8414f0c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -69,12 +69,6 @@ static cl::opt<unsigned> NumRepeatedDivisors(
              "transformation to multiplications by the reciprocal"),
     cl::init(2));
 
-static cl::opt<int>
-    FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
-              cl::desc("Give the maximum number of instructions that we will "
-                       "use for creating a floating-point immediate value"),
-              cl::init(2));
-
 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                                          const RISCVSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
@@ -2031,10 +2025,10 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 
   // Building an integer and then converting requires a fmv at the end of
   // the integer sequence.
-  const int Cost =
+  const unsigned Cost =
     1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
                                    Subtarget.getFeatureBits());
-  return Cost <= FPImmCost;
+  return Cost <= Subtarget.getMaxBuildIntsCost();
 }
 
 // TODO: This is very conservative.
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index 8a0c4240d161bfb..5debb81e6d47447 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -55,11 +55,12 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK32ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
 ; CHECK32ZFBFMIN-NEXT:    neg a0, a0
-; CHECK32ZFBFMIN-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK32ZFBFMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
 ; CHECK32ZFBFMIN-NEXT:    lui a1, 815104
-; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa3, a1
-; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    lui a1, 290816
+; CHECK32ZFBFMIN-NEXT:    addi a1, a1, -512
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a1, fa5, rtz
 ; CHECK32ZFBFMIN-NEXT:    and a0, a0, a1
@@ -71,12 +72,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; RV32ID-NEXT:    slli a0, a0, 16
 ; RV32ID-NEXT:    fmv.w.x fa5, a0
 ; RV32ID-NEXT:    feq.s a0, fa5, fa5
-; RV32ID-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32ID-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; RV32ID-NEXT:    lui a1, 815104
-; RV32ID-NEXT:    fmv.w.x fa3, a1
-; RV32ID-NEXT:    fmax.s fa5, fa5, fa3
 ; RV32ID-NEXT:    neg a0, a0
+; RV32ID-NEXT:    lui a1, 815104
+; RV32ID-NEXT:    fmv.w.x fa4, a1
+; RV32ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-NEXT:    lui a1, 290816
+; RV32ID-NEXT:    addi a1, a1, -512
+; RV32ID-NEXT:    fmv.w.x fa4, a1
 ; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32ID-NEXT:    and a0, a0, a1
@@ -86,12 +88,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; CHECK64ZFBFMIN:       # %bb.0: # %start
 ; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK64ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
-; CHECK64ZFBFMIN-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK64ZFBFMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; CHECK64ZFBFMIN-NEXT:    lui a1, 815104
-; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa3, a1
-; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa3
 ; CHECK64ZFBFMIN-NEXT:    neg a0, a0
+; CHECK64ZFBFMIN-NEXT:    lui a1, 815104
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    lui a1, 290816
+; CHECK64ZFBFMIN-NEXT:    addi a1, a1, -512
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64ZFBFMIN-NEXT:    fcvt.l.s a1, fa5, rtz
 ; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
@@ -105,12 +108,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    feq.s a0, fa5, fa5
-; RV64ID-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64ID-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; RV64ID-NEXT:    lui a1, 815104
-; RV64ID-NEXT:    fmv.w.x fa3, a1
-; RV64ID-NEXT:    fmax.s fa5, fa5, fa3
 ; RV64ID-NEXT:    neg a0, a0
+; RV64ID-NEXT:    lui a1, 815104
+; RV64ID-NEXT:    fmv.w.x fa4, a1
+; RV64ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-NEXT:    lui a1, 290816
+; RV64ID-NEXT:    addi a1, a1, -512
+; RV64ID-NEXT:    fmv.w.x fa4, a1
 ; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64ID-NEXT:    and a0, a0, a1
@@ -158,12 +162,13 @@ define i16 @fcvt_ui_bf16(bfloat %a) nounwind {
 define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
 ; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
 ; CHECK32ZFBFMIN:       # %bb.0: # %start
-; CHECK32ZFBFMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK32ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
-; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK32ZFBFMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    lui a0, 292864
+; CHECK32ZFBFMIN-NEXT:    addi a0, a0, -256
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK32ZFBFMIN-NEXT:    ret
 ;
@@ -171,38 +176,41 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
 ; RV32ID:       # %bb.0: # %start
 ; RV32ID-NEXT:    fmv.x.w a0, fa0
 ; RV32ID-NEXT:    slli a0, a0, 16
-; RV32ID-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a1)
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fmv.w.x fa4, zero
+; RV32ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-NEXT:    lui a0, 292864
+; RV32ID-NEXT:    addi a0, a0, -256
 ; RV32ID-NEXT:    fmv.w.x fa4, a0
-; RV32ID-NEXT:    fmv.w.x fa3, zero
-; RV32ID-NEXT:    fmax.s fa4, fa4, fa3
-; RV32ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32ID-NEXT:    ret
 ;
 ; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
 ; CHECK64ZFBFMIN:       # %bb.0: # %start
-; CHECK64ZFBFMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK64ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
-; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK64ZFBFMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    lui a0, 292864
+; CHECK64ZFBFMIN-NEXT:    addi a0, a0, -256
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64ZFBFMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; CHECK64ZFBFMIN-NEXT:    ret
 ;
 ; RV64ID-LABEL: fcvt_ui_bf16_sat:
 ; RV64ID:       # %bb.0: # %start
-; RV64ID-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
 ; RV64ID-NEXT:    slli a0, a0, 48
 ; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fmv.w.x fa4, zero
+; RV64ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-NEXT:    lui a0, 292864
+; RV64ID-NEXT:    addi a0, a0, -256
 ; RV64ID-NEXT:    fmv.w.x fa4, a0
-; RV64ID-NEXT:    fmv.w.x fa3, zero
-; RV64ID-NEXT:    fmax.s fa4, fa4, fa3
-; RV64ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64ID-NEXT:    ret
 start:
@@ -492,8 +500,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; RV32IZFBFMIN-NEXT:  # %bb.1: # %start
 ; RV32IZFBFMIN-NEXT:    mv a2, a1
 ; RV32IZFBFMIN-NEXT:  .LBB10_2: # %start
-; RV32IZFBFMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IZFBFMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IZFBFMIN-NEXT:    lui a1, 389120
+; RV32IZFBFMIN-NEXT:    addi a1, a1, -1
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFBFMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFBFMIN-NEXT:    beqz a3, .LBB10_4
 ; RV32IZFBFMIN-NEXT:  # %bb.3:
@@ -502,9 +511,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; RV32IZFBFMIN-NEXT:    feq.s a1, fs0, fs0
 ; RV32IZFBFMIN-NEXT:    neg a4, a1
 ; RV32IZFBFMIN-NEXT:    and a1, a4, a2
+; RV32IZFBFMIN-NEXT:    neg a2, s0
+; RV32IZFBFMIN-NEXT:    and a0, a2, a0
 ; RV32IZFBFMIN-NEXT:    neg a2, a3
-; RV32IZFBFMIN-NEXT:    neg a3, s0
-; RV32IZFBFMIN-NEXT:    and a0, a3, a0
 ; RV32IZFBFMIN-NEXT:    or a0, a2, a0
 ; RV32IZFBFMIN-NEXT:    and a0, a4, a0
 ; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -531,8 +540,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; R32IDZFBFMIN-NEXT:  # %bb.1: # %start
 ; R32IDZFBFMIN-NEXT:    mv a2, a1
 ; R32IDZFBFMIN-NEXT:  .LBB10_2: # %start
-; R32IDZFBFMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; R32IDZFBFMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; R32IDZFBFMIN-NEXT:    lui a1, 389120
+; R32IDZFBFMIN-NEXT:    addi a1, a1, -1
+; R32IDZFBFMIN-NEXT:    fmv.w.x fa5, a1
 ; R32IDZFBFMIN-NEXT:    flt.s a3, fa5, fs0
 ; R32IDZFBFMIN-NEXT:    beqz a3, .LBB10_4
 ; R32IDZFBFMIN-NEXT:  # %bb.3:
@@ -541,9 +551,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; R32IDZFBFMIN-NEXT:    feq.s a1, fs0, fs0
 ; R32IDZFBFMIN-NEXT:    neg a4, a1
 ; R32IDZFBFMIN-NEXT:    and a1, a4, a2
+; R32IDZFBFMIN-NEXT:    neg a2, s0
+; R32IDZFBFMIN-NEXT:    and a0, a2, a0
 ; R32IDZFBFMIN-NEXT:    neg a2, a3
-; R32IDZFBFMIN-NEXT:    neg a3, s0
-; R32IDZFBFMIN-NEXT:    and a0, a3, a0
 ; R32IDZFBFMIN-NEXT:    or a0, a2, a0
 ; R32IDZFBFMIN-NEXT:    and a0, a4, a0
 ; R32IDZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -572,8 +582,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; RV32ID-NEXT:  # %bb.1: # %start
 ; RV32ID-NEXT:    mv a2, a1
 ; RV32ID-NEXT:  .LBB10_2: # %start
-; RV32ID-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32ID-NEXT:    lui a1, 389120
+; RV32ID-NEXT:    addi a1, a1, -1
+; RV32ID-NEXT:    fmv.w.x fa5, a1
 ; RV32ID-NEXT:    flt.s a3, fa5, fs0
 ; RV32ID-NEXT:    beqz a3, .LBB10_4
 ; RV32ID-NEXT:  # %bb.3:
@@ -665,30 +676,59 @@ define i64 @fcvt_lu_bf16(bfloat %a) nounwind {
 }
 
 define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
-; CHECK32ZFBFMIN-LABEL: fcvt_lu_bf16_sat:
-; CHECK32ZFBFMIN:       # %bb.0: # %start
-; CHECK32ZFBFMIN-NEXT:    addi sp, sp, -16
-; CHECK32ZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK32ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI12_0)(a0)
-; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa0, fa0
-; CHECK32ZFBFMIN-NEXT:    flt.s a0, fa5, fa0
-; CHECK32ZFBFMIN-NEXT:    neg s0, a0
-; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa5, zero
-; CHECK32ZFBFMIN-NEXT:    fle.s a0, fa5, fa0
-; CHECK32ZFBFMIN-NEXT:    neg s1, a0
-; CHECK32ZFBFMIN-NEXT:    call __fixunssfdi at plt
-; CHECK32ZFBFMIN-NEXT:    and a0, s1, a0
-; CHECK32ZFBFMIN-NEXT:    or a0, s0, a0
-; CHECK32ZFBFMIN-NEXT:    and a1, s1, a1
-; CHECK32ZFBFMIN-NEXT:    or a1, s0, a1
-; CHECK32ZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT:    addi sp, sp, 16
-; CHECK32ZFBFMIN-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fcvt_lu_bf16_sat:
+; RV32IZFBFMIN:       # %bb.0: # %start
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fs0, fa0
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa5, zero
+; RV32IZFBFMIN-NEXT:    fle.s a0, fa5, fs0
+; RV32IZFBFMIN-NEXT:    neg s0, a0
+; RV32IZFBFMIN-NEXT:    fmv.s fa0, fs0
+; RV32IZFBFMIN-NEXT:    call __fixunssfdi at plt
+; RV32IZFBFMIN-NEXT:    and a0, s0, a0
+; RV32IZFBFMIN-NEXT:    lui a2, 391168
+; RV32IZFBFMIN-NEXT:    addi a2, a2, -1
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa5, a2
+; RV32IZFBFMIN-NEXT:    flt.s a2, fa5, fs0
+; RV32IZFBFMIN-NEXT:    neg a2, a2
+; RV32IZFBFMIN-NEXT:    or a0, a2, a0
+; RV32IZFBFMIN-NEXT:    and a1, s0, a1
+; RV32IZFBFMIN-NEXT:    or a1, a2, a1
+; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; R32IDZFBFMIN-LABEL: fcvt_lu_bf16_sat:
+; R32IDZFBFMIN:       # %bb.0: # %start
+; R32IDZFBFMIN-NEXT:    addi sp, sp, -16
+; R32IDZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; R32IDZFBFMIN-NEXT:    fcvt.s.bf16 fs0, fa0
+; R32IDZFBFMIN-NEXT:    fmv.w.x fa5, zero
+; R32IDZFBFMIN-NEXT:    fle.s a0, fa5, fs0
+; R32IDZFBFMIN-NEXT:    neg s0, a0
+; R32IDZFBFMIN-NEXT:    fmv.s fa0, fs0
+; R32IDZFBFMIN-NEXT:    call __fixunssfdi at plt
+; R32IDZFBFMIN-NEXT:    and a0, s0, a0
+; R32IDZFBFMIN-NEXT:    lui a2, 391168
+; R32IDZFBFMIN-NEXT:    addi a2, a2, -1
+; R32IDZFBFMIN-NEXT:    fmv.w.x fa5, a2
+; R32IDZFBFMIN-NEXT:    flt.s a2, fa5, fs0
+; R32IDZFBFMIN-NEXT:    neg a2, a2
+; R32IDZFBFMIN-NEXT:    or a0, a2, a0
+; R32IDZFBFMIN-NEXT:    and a1, s0, a1
+; R32IDZFBFMIN-NEXT:    or a1, a2, a1
+; R32IDZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; R32IDZFBFMIN-NEXT:    addi sp, sp, 16
+; R32IDZFBFMIN-NEXT:    ret
 ;
 ; RV32ID-LABEL: fcvt_lu_bf16_sat:
 ; RV32ID:       # %bb.0: # %start
@@ -696,11 +736,12 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
 ; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32ID-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32ID-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32ID-NEXT:    lui a0, %hi(.LCPI12_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI12_0)(a0)
 ; RV32ID-NEXT:    fmv.x.w a0, fa0
 ; RV32ID-NEXT:    slli a0, a0, 16
 ; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lui a0, 391168
+; RV32ID-NEXT:    addi a0, a0, -1
+; RV32ID-NEXT:    fmv.w.x fa5, a0
 ; RV32ID-NEXT:    flt.s a0, fa5, fa0
 ; RV32ID-NEXT:    neg s0, a0
 ; RV32ID-NEXT:    fmv.w.x fa5, zero
diff --git a/llvm/test/CodeGen/RISCV/bfloat-imm.ll b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
index cd4e960b5a062a0..ed2c6f59f8fd576 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-imm.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi ilp32f < %s | FileCheck %s
+; RUN:   -target-abi ilp32f < %s | FileCheck --check-prefixes=CHECK %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi lp64f < %s | FileCheck %s
+; RUN:   -target-abi lp64f < %s | FileCheck --check-prefixes=CHECK %s
 
 define bfloat @bfloat_imm() nounwind {
 ; CHECK-LABEL: bfloat_imm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa0, %lo(.LCPI0_0)(a0)
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    addi a0, a0, 64
+; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
   ret bfloat 3.0
 }
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index 6587f0c8c5af7bf..ac0aa7b620c616f 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -415,8 +415,9 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV32-ILP32F:       # %bb.0:
 ; RV32-ILP32F-NEXT:    addi sp, sp, -16
 ; RV32-ILP32F-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ILP32F-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32-ILP32F-NEXT:    flw fa0, %lo(.LCPI3_0)(a0)
+; RV32-ILP32F-NEXT:    lui a0, 1048565
+; RV32-ILP32F-NEXT:    addi a0, a0, -1792
+; RV32-ILP32F-NEXT:    fmv.w.x fa0, a0
 ; RV32-ILP32F-NEXT:    li a0, 1
 ; RV32-ILP32F-NEXT:    li a1, 2
 ; RV32-ILP32F-NEXT:    li a2, 3
@@ -434,8 +435,9 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV64-LP64F:       # %bb.0:
 ; RV64-LP64F-NEXT:    addi sp, sp, -16
 ; RV64-LP64F-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-LP64F-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64-LP64F-NEXT:    flw fa0, %lo(.LCPI3_0)(a0)
+; RV64-LP64F-NEXT:    lui a0, 1048565
+; RV64-LP64F-NEXT:    addi a0, a0, -1792
+; RV64-LP64F-NEXT:    fmv.w.x fa0, a0
 ; RV64-LP64F-NEXT:    li a0, 1
 ; RV64-LP64F-NEXT:    li a1, 2
 ; RV64-LP64F-NEXT:    li a2, 3
@@ -453,8 +455,9 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV32-ILP32ZFHMIN:       # %bb.0:
 ; RV32-ILP32ZFHMIN-NEXT:    addi sp, sp, -16
 ; RV32-ILP32ZFHMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ILP32ZFHMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32-ILP32ZFHMIN-NEXT:    flh fa0, %lo(.LCPI3_0)(a0)
+; RV32-ILP32ZFHMIN-NEXT:    lui a0, 5
+; RV32-ILP32ZFHMIN-NEXT:    addi a0, a0, -1792
+; RV32-ILP32ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32-ILP32ZFHMIN-NEXT:    li a0, 1
 ; RV32-ILP32ZFHMIN-NEXT:    li a1, 2
 ; RV32-ILP32ZFHMIN-NEXT:    li a2, 3
@@ -472,8 +475,9 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV64-LP64ZFHMIN:       # %bb.0:
 ; RV64-LP64ZFHMIN-NEXT:    addi sp, sp, -16
 ; RV64-LP64ZFHMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-LP64ZFHMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64-LP64ZFHMIN-NEXT:    flh fa0, %lo(.LCPI3_0)(a0)
+; RV64-LP64ZFHMIN-NEXT:    lui a0, 5
+; RV64-LP64ZFHMIN-NEXT:    addi a0, a0, -1792
+; RV64-LP64ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV64-LP64ZFHMIN-NEXT:    li a0, 1
 ; RV64-LP64ZFHMIN-NEXT:    li a1, 2
 ; RV64-LP64ZFHMIN-NEXT:    li a2, 3
@@ -511,33 +515,38 @@ define half @callee_half_ret() nounwind {
 ;
 ; RV64IF-LABEL: callee_half_ret:
 ; RV64IF:       # %bb.0:
-; RV64IF-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV64IF-NEXT:    flw fa5, %lo(.LCPI4_0)(a0)
+; RV64IF-NEXT:    lui a0, 1048564
+; RV64IF-NEXT:    addi a0, a0, -1024
+; RV64IF-NEXT:    fmv.w.x fa5, a0
 ; RV64IF-NEXT:    fmv.x.w a0, fa5
 ; RV64IF-NEXT:    ret
 ;
 ; RV32-ILP32F-LABEL: callee_half_ret:
 ; RV32-ILP32F:       # %bb.0:
-; RV32-ILP32F-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV32-ILP32F-NEXT:    flw fa0, %lo(.LCPI4_0)(a0)
+; RV32-ILP32F-NEXT:    lui a0, 1048564
+; RV32-ILP32F-NEXT:    addi a0, a0, -1024
+; RV32-ILP32F-NEXT:    fmv.w.x fa0, a0
 ; RV32-ILP32F-NEXT:    ret
 ;
 ; RV64-LP64F-LABEL: callee_half_ret:
 ; RV64-LP64F:       # %bb.0:
-; RV64-LP64F-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV64-LP64F-NEXT:    flw fa0, %lo(.LCPI4_0)(a0)
+; RV64-LP64F-NEXT:    lui a0, 1048564
+; RV64-LP64F-NEXT:    addi a0, a0, -1024
+; RV64-LP64F-NEXT:    fmv.w.x fa0, a0
 ; RV64-LP64F-NEXT:    ret
 ;
 ; RV32-ILP32ZFHMIN-LABEL: callee_half_ret:
 ; RV32-ILP32ZFHMIN:       # %bb.0:
-; RV32-ILP32ZFHMIN-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV32-ILP32ZFHMIN-NEXT:    flh fa0, %lo(.LCPI4_0)(a0)
+; RV32-ILP32ZFHMIN-NEXT:    li a0, 15
+; RV32-ILP32ZFHMIN-NEXT:    slli a0, a0, 10
+; RV32-ILP32ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32-ILP32ZFHMIN-NEXT:    ret
 ;
 ; RV64-LP64ZFHMIN-LABEL: callee_half_ret:
 ; RV64-LP64ZFHMIN:       # %bb.0:
-; RV64-LP64ZFHMIN-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV64-LP64ZFHMIN-NEXT:    flh fa0, %lo(.LCPI4_0)(a0)
+; RV64-LP64ZFHMIN-NEXT:    li a0, 15
+; RV64-LP64ZFHMIN-NEXT:    slli a0, a0, 10
+; RV64-LP64ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV64-LP64ZFHMIN-NEXT:    ret
   ret half 1.0
 }
diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll
index 617155b31976187..562f2fd0c270cf7 100644
--- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll
+++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll
@@ -124,16 +124,17 @@ indirectgoto:
 define float @lower_constantpool(float %a) nounwind {
 ; RV32I-SMALL-LABEL: lower_constantpool:
 ; RV32I-SMALL:       # %bb.0:
-; RV32I-SMALL-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32I-SMALL-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
+; RV32I-SMALL-NEXT:    lui a0, 260097
+; RV32I-SMALL-NEXT:    addi a0, a0, -2048
+; RV32I-SMALL-NEXT:    fmv.w.x fa5, a0
 ; RV32I-SMALL-NEXT:    fadd.s fa0, fa0, fa5
 ; RV32I-SMALL-NEXT:    ret
 ;
 ; RV32I-MEDIUM-LABEL: lower_constantpool:
 ; RV32I-MEDIUM:       # %bb.0:
-; RV32I-MEDIUM-NEXT:  .Lpcrel_hi3:
-; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.LCPI3_0)
-; RV32I-MEDIUM-NEXT:    flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0)
+; RV32I-MEDIUM-NEXT:    lui a0, 260097
+; RV32I-MEDIUM-NEXT:    addi a0, a0, -2048
+; RV32I-MEDIUM-NEXT:    fmv.w.x fa5, a0
 ; RV32I-MEDIUM-NEXT:    fadd.s fa0, fa0, fa5
 ; RV32I-MEDIUM-NEXT:    ret
   %1 = fadd float %a, 1.000244140625
@@ -152,9 +153,9 @@ define i32 @lower_extern_weak(i32 %a) nounwind {
 ;
 ; RV32I-MEDIUM-LABEL: lower_extern_weak:
 ; RV32I-MEDIUM:       # %bb.0:
-; RV32I-MEDIUM-NEXT:  .Lpcrel_hi4:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi3:
 ; RV32I-MEDIUM-NEXT:    auipc a0, %got_pcrel_hi(W)
-; RV32I-MEDIUM-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi4)(a0)
+; RV32I-MEDIUM-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi3)(a0)
 ; RV32I-MEDIUM-NEXT:    lw a0, 0(a0)
 ; RV32I-MEDIUM-NEXT:    ret
   %1 = load volatile i32, ptr @W
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index 39ac963051b5b0b..4472d8a4ac95b38 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -1768,13 +1768,16 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: fcvt_w_s_sat_i16:
 ; RV64IFD:       # %bb.0: # %start
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI26_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI26_0)(a0)
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI26_1)
-; RV64IFD-NEXT:    fld fa4, %lo(.LCPI26_1)(a0)
 ; RV64IFD-NEXT:    feq.d a0, fa0, fa0
-; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
 ; RV64IFD-NEXT:    neg a0, a0
+; RV64IFD-NEXT:    li a1, -505
+; RV64IFD-NEXT:    slli a1, a1, 53
+; RV64IFD-NEXT:    fmv.d.x fa5, a1
+; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT:    lui a1, 4152
+; RV64IFD-NEXT:    addi a1, a1, -1
+; RV64IFD-NEXT:    slli a1, a1, 38
+; RV64IFD-NEXT:    fmv.d.x fa4, a1
 ; RV64IFD-NEXT:    fmin.d fa5, fa5, fa4
 ; RV64IFD-NEXT:    fcvt.l.d a1, fa5, rtz
 ; RV64IFD-NEXT:    and a0, a0, a1
@@ -1804,16 +1807,17 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i16:
 ; RV64IZFINXZDINX:       # %bb.0: # %start
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI26_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI26_0)(a1)
-; RV64IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI26_1)
-; RV64IZFINXZDINX-NEXT:    ld a2, %lo(.LCPI26_1)(a2)
-; RV64IZFINXZDINX-NEXT:    fmax.d a1, a0, a1
-; RV64IZFINXZDINX-NEXT:    feq.d a0, a0, a0
-; RV64IZFINXZDINX-NEXT:    neg a0, a0
-; RV64IZFINXZDINX-NEXT:    fmin.d a1, a1, a2
-; RV64IZFINXZDINX-NEXT:    fcvt.l.d a1, a1, rtz
-; RV64IZFINXZDINX-NEXT:    and a0, a0, a1
+; RV64IZFINXZDINX-NEXT:    feq.d a1, a0, a0
+; RV64IZFINXZDINX-NEXT:    neg a1, a1
+; RV64IZFINXZDINX-NEXT:    li a2, -505
+; RV64IZFINXZDINX-NEXT:    slli a2, a2, 53
+; RV64IZFINXZDINX-NEXT:    fmax.d a0, a0, a2
+; RV64IZFINXZDINX-NEXT:    lui a2, 4152
+; RV64IZFINXZDINX-NEXT:    addi a2, a2, -1
+; RV64IZFINXZDINX-NEXT:    slli a2, a2, 38
+; RV64IZFINXZDINX-NEXT:    fmin.d a0, a0, a2
+; RV64IZFINXZDINX-NEXT:    fcvt.l.d a0, a0, rtz
+; RV64IZFINXZDINX-NEXT:    and a0, a1, a0
 ; RV64IZFINXZDINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -1981,11 +1985,13 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IFD:       # %bb.0: # %start
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI28_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI28_0)(a0)
-; RV64IFD-NEXT:    fmv.d.x fa4, zero
-; RV64IFD-NEXT:    fmax.d fa4, fa0, fa4
-; RV64IFD-NEXT:    fmin.d fa5, fa4, fa5
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT:    lui a0, 8312
+; RV64IFD-NEXT:    addi a0, a0, -1
+; RV64IFD-NEXT:    slli a0, a0, 37
+; RV64IFD-NEXT:    fmv.d.x fa4, a0
+; RV64IFD-NEXT:    fmin.d fa5, fa5, fa4
 ; RV64IFD-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64IFD-NEXT:    ret
 ;
@@ -2008,9 +2014,10 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IZFINXZDINX:       # %bb.0: # %start
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI28_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI28_0)(a1)
 ; RV64IZFINXZDINX-NEXT:    fmax.d a0, a0, zero
+; RV64IZFINXZDINX-NEXT:    lui a1, 8312
+; RV64IZFINXZDINX-NEXT:    addi a1, a1, -1
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 37
 ; RV64IZFINXZDINX-NEXT:    fmin.d a0, a0, a1
 ; RV64IZFINXZDINX-NEXT:    fcvt.lu.d a0, a0, rtz
 ; RV64IZFINXZDINX-NEXT:    ret
@@ -2167,13 +2174,15 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: fcvt_w_s_sat_i8:
 ; RV64IFD:       # %bb.0: # %start
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI30_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI30_0)(a0)
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI30_1)
-; RV64IFD-NEXT:    fld fa4, %lo(.LCPI30_1)(a0)
 ; RV64IFD-NEXT:    feq.d a0, fa0, fa0
-; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
 ; RV64IFD-NEXT:    neg a0, a0
+; RV64IFD-NEXT:    li a1, -509
+; RV64IFD-NEXT:    slli a1, a1, 53
+; RV64IFD-NEXT:    fmv.d.x fa5, a1
+; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT:    lui a1, 65919
+; RV64IFD-NEXT:    slli a1, a1, 34
+; RV64IFD-NEXT:    fmv.d.x fa4, a1
 ; RV64IFD-NEXT:    fmin.d fa5, fa5, fa4
 ; RV64IFD-NEXT:    fcvt.l.d a1, fa5, rtz
 ; RV64IFD-NEXT:    and a0, a0, a1
@@ -2203,16 +2212,16 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i8:
 ; RV64IZFINXZDINX:       # %bb.0: # %start
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI30_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI30_0)(a1)
-; RV64IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI30_1)
-; RV64IZFINXZDINX-NEXT:    ld a2, %lo(.LCPI30_1)(a2)
-; RV64IZFINXZDINX-NEXT:    fmax.d a1, a0, a1
-; RV64IZFINXZDINX-NEXT:    feq.d a0, a0, a0
-; RV64IZFINXZDINX-NEXT:    neg a0, a0
-; RV64IZFINXZDINX-NEXT:    fmin.d a1, a1, a2
-; RV64IZFINXZDINX-NEXT:    fcvt.l.d a1, a1, rtz
-; RV64IZFINXZDINX-NEXT:    and a0, a0, a1
+; RV64IZFINXZDINX-NEXT:    feq.d a1, a0, a0
+; RV64IZFINXZDINX-NEXT:    neg a1, a1
+; RV64IZFINXZDINX-NEXT:    li a2, -509
+; RV64IZFINXZDINX-NEXT:    slli a2, a2, 53
+; RV64IZFINXZDINX-NEXT:    fmax.d a0, a0, a2
+; RV64IZFINXZDINX-NEXT:    lui a2, 65919
+; RV64IZFINXZDINX-NEXT:    slli a2, a2, 34
+; RV64IZFINXZDINX-NEXT:    fmin.d a0, a0, a2
+; RV64IZFINXZDINX-NEXT:    fcvt.l.d a0, a0, rtz
+; RV64IZFINXZDINX-NEXT:    and a0, a1, a0
 ; RV64IZFINXZDINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i8:
@@ -2379,11 +2388,12 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: fcvt_wu_s_sat_i8:
 ; RV64IFD:       # %bb.0: # %start
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI32_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI32_0)(a0)
-; RV64IFD-NEXT:    fmv.d.x fa4, zero
-; RV64IFD-NEXT:    fmax.d fa4, fa0, fa4
-; RV64IFD-NEXT:    fmin.d fa5, fa4, fa5
+; RV64IFD-NEXT:    fmv.d.x fa5, zero
+; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT:    lui a0, 131967
+; RV64IFD-NEXT:    slli a0, a0, 33
+; RV64IFD-NEXT:    fmv.d.x fa4, a0
+; RV64IFD-NEXT:    fmin.d fa5, fa5, fa4
 ; RV64IFD-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64IFD-NEXT:    ret
 ;
@@ -2406,9 +2416,9 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fcvt_wu_s_sat_i8:
 ; RV64IZFINXZDINX:       # %bb.0: # %start
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI32_0)(a1)
 ; RV64IZFINXZDINX-NEXT:    fmax.d a0, a0, zero
+; RV64IZFINXZDINX-NEXT:    lui a1, 131967
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 33
 ; RV64IZFINXZDINX-NEXT:    fmin.d a0, a0, a1
 ; RV64IZFINXZDINX-NEXT:    fcvt.lu.d a0, a0, rtz
 ; RV64IZFINXZDINX-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll
index dd554a8ce0dcf74..b42d79ed95ffdd6 100644
--- a/llvm/test/CodeGen/RISCV/double-imm.ll
+++ b/llvm/test/CodeGen/RISCV/double-imm.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   -target-abi=ilp32d | FileCheck %s
+; RUN:   -target-abi=ilp32d | FileCheck --check-prefixes=CHECK,RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   -target-abi=lp64d | FileCheck %s
+; RUN:   -target-abi=lp64d | FileCheck --check-prefixes=CHECK,RV64 %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs < %s \
 ; RUN:   -target-abi=ilp32 | FileCheck --check-prefix=CHECKRV32ZDINX %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zdinx -verify-machineinstrs < %s \
@@ -31,13 +31,21 @@ define double @double_imm() nounwind {
   ret double 3.1415926535897931159979634685441851615905761718750
 }
 
-define double @double_imm_op(double %a) nounwind {
-; CHECK-LABEL: double_imm_op:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI1_0)(a0)
-; CHECK-NEXT:    fadd.d fa0, fa0, fa5
-; CHECK-NEXT:    ret
+define double @double_imm_op(double %a) nounwind {;
+; RV32-LABEL: double_imm_op:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI1_0)(a0)
+; RV32-NEXT:    fadd.d fa0, fa0, fa5
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: double_imm_op:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a0, 1023
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    fadd.d fa0, fa0, fa5
+; RV64-NEXT:    ret
 ;
 ; CHECKRV32ZDINX-LABEL: double_imm_op:
 ; CHECKRV32ZDINX:       # %bb.0:
@@ -59,8 +67,8 @@ define double @double_imm_op(double %a) nounwind {
 ;
 ; CHECKRV64ZDINX-LABEL: double_imm_op:
 ; CHECKRV64ZDINX:       # %bb.0:
-; CHECKRV64ZDINX-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECKRV64ZDINX-NEXT:    ld a1, %lo(.LCPI1_0)(a1)
+; CHECKRV64ZDINX-NEXT:    li a1, 1023
+; CHECKRV64ZDINX-NEXT:    slli a1, a1, 52
 ; CHECKRV64ZDINX-NEXT:    fadd.d a0, a0, a1
 ; CHECKRV64ZDINX-NEXT:    ret
   %1 = fadd double %a, 1.0
@@ -68,6 +76,16 @@ define double @double_imm_op(double %a) nounwind {
 }
 
 define double @double_positive_zero(ptr %pd) nounwind {
+; RV32-LABEL: double_positive_zero:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fcvt.d.w fa0, zero
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: double_positive_zero:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmv.d.x fa0, zero
+; RV64-NEXT:    ret
+;
 ; CHECKRV32ZDINX-LABEL: double_positive_zero:
 ; CHECKRV32ZDINX:       # %bb.0:
 ; CHECKRV32ZDINX-NEXT:    li a0, 0
@@ -82,6 +100,18 @@ define double @double_positive_zero(ptr %pd) nounwind {
 }
 
 define double @double_negative_zero(ptr %pd) nounwind {
+; RV32-LABEL: double_negative_zero:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fcvt.d.w fa5, zero
+; RV32-NEXT:    fneg.d fa0, fa5
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: double_negative_zero:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmv.d.x fa5, zero
+; RV64-NEXT:    fneg.d fa0, fa5
+; RV64-NEXT:    ret
+;
 ; CHECKRV32ZDINX-LABEL: double_negative_zero:
 ; CHECKRV32ZDINX:       # %bb.0:
 ; CHECKRV32ZDINX-NEXT:    lui a1, 524288
diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
index 36268accc8fd04c..d3bcac745ac8caf 100644
--- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
@@ -944,8 +944,9 @@ define double @floor_f64(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: floor_f64:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI17_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI17_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB17_2
@@ -967,8 +968,8 @@ define double @floor_f64(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: floor_f64:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI17_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB17_2
@@ -1009,8 +1010,9 @@ define double @ceil_f64(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: ceil_f64:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI18_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB18_2
@@ -1032,8 +1034,8 @@ define double @ceil_f64(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: ceil_f64:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI18_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI18_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB18_2
@@ -1074,8 +1076,9 @@ define double @trunc_f64(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: trunc_f64:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI19_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB19_2
@@ -1097,8 +1100,8 @@ define double @trunc_f64(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: trunc_f64:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI19_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI19_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB19_2
@@ -1139,8 +1142,9 @@ define double @rint_f64(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: rint_f64:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI20_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB20_2
@@ -1162,8 +1166,8 @@ define double @rint_f64(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: rint_f64:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI20_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI20_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB20_2
@@ -1245,8 +1249,9 @@ define double @round_f64(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: round_f64:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB22_2
@@ -1268,8 +1273,8 @@ define double @round_f64(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: round_f64:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI22_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI22_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB22_2
@@ -1310,8 +1315,9 @@ define double @roundeven_f64(double %a) nounwind {
 ;
 ; RV64IFD-LABEL: roundeven_f64:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI23_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB23_2
@@ -1333,8 +1339,8 @@ define double @roundeven_f64(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: roundeven_f64:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI23_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI23_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB23_2
diff --git a/llvm/test/CodeGen/RISCV/double-round-conv.ll b/llvm/test/CodeGen/RISCV/double-round-conv.ll
index 6327afd881a53eb..576c3b7c680e646 100644
--- a/llvm/test/CodeGen/RISCV/double-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/double-round-conv.ll
@@ -1315,8 +1315,9 @@ define double @test_floor_double(double %x) {
 ;
 ; RV64IFD-LABEL: test_floor_double:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI40_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB40_2
@@ -1340,8 +1341,8 @@ define double @test_floor_double(double %x) {
 ;
 ; RV64IZFINXZDINX-LABEL: test_floor_double:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI40_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI40_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB40_2
@@ -1362,8 +1363,9 @@ define double @test_ceil_double(double %x) {
 ;
 ; RV64IFD-LABEL: test_ceil_double:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI41_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI41_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB41_2
@@ -1387,8 +1389,8 @@ define double @test_ceil_double(double %x) {
 ;
 ; RV64IZFINXZDINX-LABEL: test_ceil_double:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI41_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI41_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB41_2
@@ -1409,8 +1411,9 @@ define double @test_trunc_double(double %x) {
 ;
 ; RV64IFD-LABEL: test_trunc_double:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI42_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB42_2
@@ -1434,8 +1437,8 @@ define double @test_trunc_double(double %x) {
 ;
 ; RV64IZFINXZDINX-LABEL: test_trunc_double:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI42_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI42_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB42_2
@@ -1456,8 +1459,9 @@ define double @test_round_double(double %x) {
 ;
 ; RV64IFD-LABEL: test_round_double:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI43_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI43_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB43_2
@@ -1481,8 +1485,8 @@ define double @test_round_double(double %x) {
 ;
 ; RV64IZFINXZDINX-LABEL: test_round_double:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI43_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI43_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB43_2
@@ -1503,8 +1507,9 @@ define double @test_roundeven_double(double %x) {
 ;
 ; RV64IFD-LABEL: test_roundeven_double:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a0, %hi(.LCPI44_0)
-; RV64IFD-NEXT:    fld fa5, %lo(.LCPI44_0)(a0)
+; RV64IFD-NEXT:    li a0, 1075
+; RV64IFD-NEXT:    slli a0, a0, 52
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
 ; RV64IFD-NEXT:    fabs.d fa4, fa0
 ; RV64IFD-NEXT:    flt.d a0, fa4, fa5
 ; RV64IFD-NEXT:    beqz a0, .LBB44_2
@@ -1528,8 +1533,8 @@ define double @test_roundeven_double(double %x) {
 ;
 ; RV64IZFINXZDINX-LABEL: test_roundeven_double:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI44_0)
-; RV64IZFINXZDINX-NEXT:    ld a1, %lo(.LCPI44_0)(a1)
+; RV64IZFINXZDINX-NEXT:    li a1, 1075
+; RV64IZFINXZDINX-NEXT:    slli a1, a1, 52
 ; RV64IZFINXZDINX-NEXT:    fabs.d a2, a0
 ; RV64IZFINXZDINX-NEXT:    flt.d a1, a2, a1
 ; RV64IZFINXZDINX-NEXT:    beqz a1, .LBB44_2
diff --git a/llvm/test/CodeGen/RISCV/double-zfa.ll b/llvm/test/CodeGen/RISCV/double-zfa.ll
index 904dd9f2ccbef91..cf70c8b329b1287 100644
--- a/llvm/test/CodeGen/RISCV/double-zfa.ll
+++ b/llvm/test/CodeGen/RISCV/double-zfa.ll
@@ -69,41 +69,71 @@ define double @loadfpimm8() {
 }
 
 define double @loadfpimm9() {
-; CHECK-LABEL: loadfpimm9:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI8_0)
-; CHECK-NEXT:    fld fa0, %lo(.LCPI8_0)(a0)
-; CHECK-NEXT:    ret
+; RV32IDZFA-LABEL: loadfpimm9:
+; RV32IDZFA:       # %bb.0:
+; RV32IDZFA-NEXT:    lui a0, %hi(.LCPI8_0)
+; RV32IDZFA-NEXT:    fld fa0, %lo(.LCPI8_0)(a0)
+; RV32IDZFA-NEXT:    ret
+;
+; RV64DZFA-LABEL: loadfpimm9:
+; RV64DZFA:       # %bb.0:
+; RV64DZFA-NEXT:    lui a0, 131967
+; RV64DZFA-NEXT:    slli a0, a0, 33
+; RV64DZFA-NEXT:    fmv.d.x fa0, a0
+; RV64DZFA-NEXT:    ret
   ret double 255.0
 }
 
 ; Negative test. This is 1 * 2^256.
 define double @loadfpimm10() {
-; CHECK-LABEL: loadfpimm10:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
-; CHECK-NEXT:    fld fa0, %lo(.LCPI9_0)(a0)
-; CHECK-NEXT:    ret
+; RV32IDZFA-LABEL: loadfpimm10:
+; RV32IDZFA:       # %bb.0:
+; RV32IDZFA-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IDZFA-NEXT:    fld fa0, %lo(.LCPI9_0)(a0)
+; RV32IDZFA-NEXT:    ret
+;
+; RV64DZFA-LABEL: loadfpimm10:
+; RV64DZFA:       # %bb.0:
+; RV64DZFA-NEXT:    li a0, 1
+; RV64DZFA-NEXT:    slli a0, a0, 60
+; RV64DZFA-NEXT:    fmv.d.x fa0, a0
+; RV64DZFA-NEXT:    ret
   ret double 0x1000000000000000
 }
 
 ; Negative test. This is a qnan with payload of 1.
 define double @loadfpimm11() {
-; CHECK-LABEL: loadfpimm11:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI10_0)
-; CHECK-NEXT:    fld fa0, %lo(.LCPI10_0)(a0)
-; CHECK-NEXT:    ret
+; RV32IDZFA-LABEL: loadfpimm11:
+; RV32IDZFA:       # %bb.0:
+; RV32IDZFA-NEXT:    lui a0, %hi(.LCPI10_0)
+; RV32IDZFA-NEXT:    fld fa0, %lo(.LCPI10_0)(a0)
+; RV32IDZFA-NEXT:    ret
+;
+; RV64DZFA-LABEL: loadfpimm11:
+; RV64DZFA:       # %bb.0:
+; RV64DZFA-NEXT:    lui a0, 4095
+; RV64DZFA-NEXT:    slli a0, a0, 39
+; RV64DZFA-NEXT:    addi a0, a0, 1
+; RV64DZFA-NEXT:    fmv.d.x fa0, a0
+; RV64DZFA-NEXT:    ret
   ret double 0x7ff8000000000001
 }
 
 ; Negative test. This is an snan with payload of 1.
 define double @loadfpimm12() {
-; CHECK-LABEL: loadfpimm12:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa0, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    ret
+; RV32IDZFA-LABEL: loadfpimm12:
+; RV32IDZFA:       # %bb.0:
+; RV32IDZFA-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IDZFA-NEXT:    fld fa0, %lo(.LCPI11_0)(a0)
+; RV32IDZFA-NEXT:    ret
+;
+; RV64DZFA-LABEL: loadfpimm12:
+; RV64DZFA:       # %bb.0:
+; RV64DZFA-NEXT:    li a0, 2047
+; RV64DZFA-NEXT:    slli a0, a0, 52
+; RV64DZFA-NEXT:    addi a0, a0, 1
+; RV64DZFA-NEXT:    fmv.d.x fa0, a0
+; RV64DZFA-NEXT:    ret
   ret double 0x7ff0000000000001
 }
 
@@ -125,11 +155,18 @@ define double @loadfpimm13() {
 
 ; Negative test. This is 2^-1023, a denormal.
 define double @loadfpimm15() {
-; CHECK-LABEL: loadfpimm15:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa0, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    ret
+; RV32IDZFA-LABEL: loadfpimm15:
+; RV32IDZFA:       # %bb.0:
+; RV32IDZFA-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IDZFA-NEXT:    fld fa0, %lo(.LCPI13_0)(a0)
+; RV32IDZFA-NEXT:    ret
+;
+; RV64DZFA-LABEL: loadfpimm15:
+; RV64DZFA:       # %bb.0:
+; RV64DZFA-NEXT:    li a0, 1
+; RV64DZFA-NEXT:    slli a0, a0, 51
+; RV64DZFA-NEXT:    fmv.d.x fa0, a0
+; RV64DZFA-NEXT:    ret
   ret double 0x0008000000000000
 }
 
@@ -144,11 +181,18 @@ define double @loadfpimm16() {
 ; Ensure fli isn't incorrectly used for negated versions of numbers in the fli
 ; table.
 define double @loadfpimm17() {
-; CHECK-LABEL: loadfpimm17:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI15_0)
-; CHECK-NEXT:    fld fa0, %lo(.LCPI15_0)(a0)
-; CHECK-NEXT:    ret
+; RV32IDZFA-LABEL: loadfpimm17:
+; RV32IDZFA:       # %bb.0:
+; RV32IDZFA-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IDZFA-NEXT:    fld fa0, %lo(.LCPI15_0)(a0)
+; RV32IDZFA-NEXT:    ret
+;
+; RV64DZFA-LABEL: loadfpimm17:
+; RV64DZFA:       # %bb.0:
+; RV64DZFA-NEXT:    li a0, -1
+; RV64DZFA-NEXT:    slli a0, a0, 62
+; RV64DZFA-NEXT:    fmv.d.x fa0, a0
+; RV64DZFA-NEXT:    ret
   ret double -2.0
 }
 
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 235979b122215a6..e4beeb01e13ebf2 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -628,8 +628,9 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV32IF-NEXT:  # %bb.1: # %start
 ; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB12_2: # %start
-; RV32IF-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI12_0)(a1)
+; RV32IF-NEXT:    lui a1, 389120
+; RV32IF-NEXT:    addi a1, a1, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a1
 ; RV32IF-NEXT:    flt.s a3, fa5, fs0
 ; RV32IF-NEXT:    beqz a3, .LBB12_4
 ; RV32IF-NEXT:  # %bb.3:
@@ -638,9 +639,9 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV32IF-NEXT:    feq.s a1, fs0, fs0
 ; RV32IF-NEXT:    neg a4, a1
 ; RV32IF-NEXT:    and a1, a4, a2
+; RV32IF-NEXT:    neg a2, s0
+; RV32IF-NEXT:    and a0, a2, a0
 ; RV32IF-NEXT:    neg a2, a3
-; RV32IF-NEXT:    neg a3, s0
-; RV32IF-NEXT:    and a0, a3, a0
 ; RV32IF-NEXT:    or a0, a2, a0
 ; RV32IF-NEXT:    and a0, a4, a0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -675,8 +676,8 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV32IZFINX-NEXT:  # %bb.1: # %start
 ; RV32IZFINX-NEXT:    mv a2, a1
 ; RV32IZFINX-NEXT:  .LBB12_2: # %start
-; RV32IZFINX-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV32IZFINX-NEXT:    lw a1, %lo(.LCPI12_0)(a1)
+; RV32IZFINX-NEXT:    lui a1, 389120
+; RV32IZFINX-NEXT:    addi a1, a1, -1
 ; RV32IZFINX-NEXT:    flt.s a3, a1, s0
 ; RV32IZFINX-NEXT:    beqz a3, .LBB12_4
 ; RV32IZFINX-NEXT:  # %bb.3:
@@ -869,9 +870,10 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV32IF-NEXT:    fle.s a0, fa5, fa0
 ; RV32IF-NEXT:    neg s0, a0
 ; RV32IF-NEXT:    call __fixunssfdi at plt
-; RV32IF-NEXT:    lui a2, %hi(.LCPI14_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI14_0)(a2)
 ; RV32IF-NEXT:    and a0, s0, a0
+; RV32IF-NEXT:    lui a2, 391168
+; RV32IF-NEXT:    addi a2, a2, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a2
 ; RV32IF-NEXT:    flt.s a2, fa5, fs0
 ; RV32IF-NEXT:    neg a2, a2
 ; RV32IF-NEXT:    or a0, a2, a0
@@ -903,9 +905,9 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV32IZFINX-NEXT:    neg s1, a0
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixunssfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI14_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI14_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s1, a0
+; RV32IZFINX-NEXT:    lui a2, 391168
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a2
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -1411,12 +1413,13 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV32IF:       # %bb.0: # %start
 ; RV32IF-NEXT:    feq.s a0, fa0, fa0
 ; RV32IF-NEXT:    neg a0, a0
-; RV32IF-NEXT:    lui a1, %hi(.LCPI24_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI24_0)(a1)
 ; RV32IF-NEXT:    lui a1, 815104
+; RV32IF-NEXT:    fmv.w.x fa5, a1
+; RV32IF-NEXT:    fmax.s fa5, fa0, fa5
+; RV32IF-NEXT:    lui a1, 290816
+; RV32IF-NEXT:    addi a1, a1, -512
 ; RV32IF-NEXT:    fmv.w.x fa4, a1
-; RV32IF-NEXT:    fmax.s fa4, fa0, fa4
-; RV32IF-NEXT:    fmin.s fa5, fa4, fa5
+; RV32IF-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IF-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32IF-NEXT:    and a0, a0, a1
 ; RV32IF-NEXT:    ret
@@ -1424,13 +1427,14 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV64IF-LABEL: fcvt_w_s_sat_i16:
 ; RV64IF:       # %bb.0: # %start
 ; RV64IF-NEXT:    feq.s a0, fa0, fa0
-; RV64IF-NEXT:    lui a1, %hi(.LCPI24_0)
-; RV64IF-NEXT:    flw fa5, %lo(.LCPI24_0)(a1)
+; RV64IF-NEXT:    neg a0, a0
 ; RV64IF-NEXT:    lui a1, 815104
+; RV64IF-NEXT:    fmv.w.x fa5, a1
+; RV64IF-NEXT:    fmax.s fa5, fa0, fa5
+; RV64IF-NEXT:    lui a1, 290816
+; RV64IF-NEXT:    addi a1, a1, -512
 ; RV64IF-NEXT:    fmv.w.x fa4, a1
-; RV64IF-NEXT:    fmax.s fa4, fa0, fa4
-; RV64IF-NEXT:    neg a0, a0
-; RV64IF-NEXT:    fmin.s fa5, fa4, fa5
+; RV64IF-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IF-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64IF-NEXT:    and a0, a0, a1
 ; RV64IF-NEXT:    ret
@@ -1438,11 +1442,11 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV32IZFINX-LABEL: fcvt_w_s_sat_i16:
 ; RV32IZFINX:       # %bb.0: # %start
 ; RV32IZFINX-NEXT:    feq.s a1, a0, a0
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI24_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI24_0)(a2)
 ; RV32IZFINX-NEXT:    neg a1, a1
-; RV32IZFINX-NEXT:    lui a3, 815104
-; RV32IZFINX-NEXT:    fmax.s a0, a0, a3
+; RV32IZFINX-NEXT:    lui a2, 815104
+; RV32IZFINX-NEXT:    fmax.s a0, a0, a2
+; RV32IZFINX-NEXT:    lui a2, 290816
+; RV32IZFINX-NEXT:    addi a2, a2, -512
 ; RV32IZFINX-NEXT:    fmin.s a0, a0, a2
 ; RV32IZFINX-NEXT:    fcvt.w.s a0, a0, rtz
 ; RV32IZFINX-NEXT:    and a0, a1, a0
@@ -1450,15 +1454,15 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ;
 ; RV64IZFINX-LABEL: fcvt_w_s_sat_i16:
 ; RV64IZFINX:       # %bb.0: # %start
-; RV64IZFINX-NEXT:    lui a1, 815104
-; RV64IZFINX-NEXT:    lui a2, %hi(.LCPI24_0)
-; RV64IZFINX-NEXT:    lw a2, %lo(.LCPI24_0)(a2)
-; RV64IZFINX-NEXT:    fmax.s a1, a0, a1
-; RV64IZFINX-NEXT:    feq.s a0, a0, a0
-; RV64IZFINX-NEXT:    neg a0, a0
-; RV64IZFINX-NEXT:    fmin.s a1, a1, a2
-; RV64IZFINX-NEXT:    fcvt.l.s a1, a1, rtz
-; RV64IZFINX-NEXT:    and a0, a0, a1
+; RV64IZFINX-NEXT:    feq.s a1, a0, a0
+; RV64IZFINX-NEXT:    neg a1, a1
+; RV64IZFINX-NEXT:    lui a2, 815104
+; RV64IZFINX-NEXT:    fmax.s a0, a0, a2
+; RV64IZFINX-NEXT:    lui a2, 290816
+; RV64IZFINX-NEXT:    addiw a2, a2, -512
+; RV64IZFINX-NEXT:    fmin.s a0, a0, a2
+; RV64IZFINX-NEXT:    fcvt.l.s a0, a0, rtz
+; RV64IZFINX-NEXT:    and a0, a1, a0
 ; RV64IZFINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -1595,38 +1599,40 @@ define zeroext i16 @fcvt_wu_s_i16(float %a) nounwind {
 define zeroext i16 @fcvt_wu_s_sat_i16(float %a) nounwind {
 ; RV32IF-LABEL: fcvt_wu_s_sat_i16:
 ; RV32IF:       # %bb.0: # %start
-; RV32IF-NEXT:    lui a0, %hi(.LCPI26_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI26_0)(a0)
-; RV32IF-NEXT:    fmv.w.x fa4, zero
-; RV32IF-NEXT:    fmax.s fa4, fa0, fa4
-; RV32IF-NEXT:    fmin.s fa5, fa4, fa5
+; RV32IF-NEXT:    fmv.w.x fa5, zero
+; RV32IF-NEXT:    fmax.s fa5, fa0, fa5
+; RV32IF-NEXT:    lui a0, 292864
+; RV32IF-NEXT:    addi a0, a0, -256
+; RV32IF-NEXT:    fmv.w.x fa4, a0
+; RV32IF-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IF-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IF:       # %bb.0: # %start
-; RV64IF-NEXT:    lui a0, %hi(.LCPI26_0)
-; RV64IF-NEXT:    flw fa5, %lo(.LCPI26_0)(a0)
-; RV64IF-NEXT:    fmv.w.x fa4, zero
-; RV64IF-NEXT:    fmax.s fa4, fa0, fa4
-; RV64IF-NEXT:    fmin.s fa5, fa4, fa5
+; RV64IF-NEXT:    fmv.w.x fa5, zero
+; RV64IF-NEXT:    fmax.s fa5, fa0, fa5
+; RV64IF-NEXT:    lui a0, 292864
+; RV64IF-NEXT:    addi a0, a0, -256
+; RV64IF-NEXT:    fmv.w.x fa4, a0
+; RV64IF-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IF-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64IF-NEXT:    ret
 ;
 ; RV32IZFINX-LABEL: fcvt_wu_s_sat_i16:
 ; RV32IZFINX:       # %bb.0: # %start
-; RV32IZFINX-NEXT:    lui a1, %hi(.LCPI26_0)
-; RV32IZFINX-NEXT:    lw a1, %lo(.LCPI26_0)(a1)
 ; RV32IZFINX-NEXT:    fmax.s a0, a0, zero
+; RV32IZFINX-NEXT:    lui a1, 292864
+; RV32IZFINX-NEXT:    addi a1, a1, -256
 ; RV32IZFINX-NEXT:    fmin.s a0, a0, a1
 ; RV32IZFINX-NEXT:    fcvt.wu.s a0, a0, rtz
 ; RV32IZFINX-NEXT:    ret
 ;
 ; RV64IZFINX-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IZFINX:       # %bb.0: # %start
-; RV64IZFINX-NEXT:    lui a1, %hi(.LCPI26_0)
-; RV64IZFINX-NEXT:    lw a1, %lo(.LCPI26_0)(a1)
 ; RV64IZFINX-NEXT:    fmax.s a0, a0, zero
+; RV64IZFINX-NEXT:    lui a1, 292864
+; RV64IZFINX-NEXT:    addiw a1, a1, -256
 ; RV64IZFINX-NEXT:    fmin.s a0, a0, a1
 ; RV64IZFINX-NEXT:    fcvt.lu.s a0, a0, rtz
 ; RV64IZFINX-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll
index a25955e2ef349f3..4cbcca2937e3bf1 100644
--- a/llvm/test/CodeGen/RISCV/float-imm.ll
+++ b/llvm/test/CodeGen/RISCV/float-imm.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   -target-abi=ilp32f | FileCheck %s
+; RUN:   -target-abi=ilp32f | FileCheck --check-prefixes=CHECK %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   -target-abi=lp64f | FileCheck %s
+; RUN:   -target-abi=lp64f | FileCheck --check-prefixes=CHECK %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \
 ; RUN:   -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX,RV32ZFINX %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \
@@ -10,10 +10,12 @@
 
 ; TODO: constant pool shouldn't be necessary for RV64IF.
 define float @float_imm() nounwind {
+;
 ; CHECK-LABEL: float_imm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flw fa0, %lo(.LCPI0_0)(a0)
+; CHECK-NEXT:    lui a0, 263313
+; CHECK-NEXT:    addi a0, a0, -37
+; CHECK-NEXT:    fmv.w.x fa0, a0
 ; CHECK-NEXT:    ret
 ;
 ; RV32ZFINX-LABEL: float_imm:
@@ -24,8 +26,8 @@ define float @float_imm() nounwind {
 ;
 ; RV64ZFINX-LABEL: float_imm:
 ; RV64ZFINX:       # %bb.0:
-; RV64ZFINX-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV64ZFINX-NEXT:    lw a0, %lo(.LCPI0_0)(a0)
+; RV64ZFINX-NEXT:    lui a0, 263313
+; RV64ZFINX-NEXT:    addiw a0, a0, -37
 ; RV64ZFINX-NEXT:    ret
   ret float 3.14159274101257324218750
 }
diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
index 61337216c7fb5b1..384c4d34ce5d0bf 100644
--- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -60,8 +60,9 @@ define i64 @test_floor_si64(float %x) nounwind {
 ; RV32IF-NEXT:  # %bb.3:
 ; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB1_4:
-; RV32IF-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI1_0)(a1)
+; RV32IF-NEXT:    lui a1, 389120
+; RV32IF-NEXT:    addi a1, a1, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a1
 ; RV32IF-NEXT:    flt.s a3, fa5, fs0
 ; RV32IF-NEXT:    beqz a3, .LBB1_6
 ; RV32IF-NEXT:  # %bb.5:
@@ -112,9 +113,9 @@ define i64 @test_floor_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s2, a0
+; RV32IZFINX-NEXT:    lui a2, 389120
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a4
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -198,9 +199,10 @@ define i64 @test_floor_ui64(float %x) nounwind {
 ; RV32IF-NEXT:    neg s0, a0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixunssfdi at plt
-; RV32IF-NEXT:    lui a2, %hi(.LCPI3_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI3_0)(a2)
 ; RV32IF-NEXT:    and a0, s0, a0
+; RV32IF-NEXT:    lui a2, 391168
+; RV32IF-NEXT:    addi a2, a2, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a2
 ; RV32IF-NEXT:    flt.s a2, fa5, fs0
 ; RV32IF-NEXT:    neg a2, a2
 ; RV32IF-NEXT:    or a0, a2, a0
@@ -241,9 +243,9 @@ define i64 @test_floor_ui64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s1, a0
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixunssfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI3_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI3_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s1, a0
+; RV32IZFINX-NEXT:    lui a2, 391168
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a2
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -320,8 +322,9 @@ define i64 @test_ceil_si64(float %x) nounwind {
 ; RV32IF-NEXT:  # %bb.3:
 ; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB5_4:
-; RV32IF-NEXT:    lui a1, %hi(.LCPI5_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI5_0)(a1)
+; RV32IF-NEXT:    lui a1, 389120
+; RV32IF-NEXT:    addi a1, a1, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a1
 ; RV32IF-NEXT:    flt.s a3, fa5, fs0
 ; RV32IF-NEXT:    beqz a3, .LBB5_6
 ; RV32IF-NEXT:  # %bb.5:
@@ -372,9 +375,9 @@ define i64 @test_ceil_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI5_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI5_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s2, a0
+; RV32IZFINX-NEXT:    lui a2, 389120
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a4
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -458,9 +461,10 @@ define i64 @test_ceil_ui64(float %x) nounwind {
 ; RV32IF-NEXT:    neg s0, a0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixunssfdi at plt
-; RV32IF-NEXT:    lui a2, %hi(.LCPI7_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI7_0)(a2)
 ; RV32IF-NEXT:    and a0, s0, a0
+; RV32IF-NEXT:    lui a2, 391168
+; RV32IF-NEXT:    addi a2, a2, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a2
 ; RV32IF-NEXT:    flt.s a2, fa5, fs0
 ; RV32IF-NEXT:    neg a2, a2
 ; RV32IF-NEXT:    or a0, a2, a0
@@ -501,9 +505,9 @@ define i64 @test_ceil_ui64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s1, a0
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixunssfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI7_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI7_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s1, a0
+; RV32IZFINX-NEXT:    lui a2, 391168
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a2
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -580,8 +584,9 @@ define i64 @test_trunc_si64(float %x) nounwind {
 ; RV32IF-NEXT:  # %bb.3:
 ; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB9_4:
-; RV32IF-NEXT:    lui a1, %hi(.LCPI9_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI9_0)(a1)
+; RV32IF-NEXT:    lui a1, 389120
+; RV32IF-NEXT:    addi a1, a1, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a1
 ; RV32IF-NEXT:    flt.s a3, fa5, fs0
 ; RV32IF-NEXT:    beqz a3, .LBB9_6
 ; RV32IF-NEXT:  # %bb.5:
@@ -632,9 +637,9 @@ define i64 @test_trunc_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI9_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI9_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s2, a0
+; RV32IZFINX-NEXT:    lui a2, 389120
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a4
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -718,9 +723,10 @@ define i64 @test_trunc_ui64(float %x) nounwind {
 ; RV32IF-NEXT:    neg s0, a0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixunssfdi at plt
-; RV32IF-NEXT:    lui a2, %hi(.LCPI11_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI11_0)(a2)
 ; RV32IF-NEXT:    and a0, s0, a0
+; RV32IF-NEXT:    lui a2, 391168
+; RV32IF-NEXT:    addi a2, a2, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a2
 ; RV32IF-NEXT:    flt.s a2, fa5, fs0
 ; RV32IF-NEXT:    neg a2, a2
 ; RV32IF-NEXT:    or a0, a2, a0
@@ -761,9 +767,9 @@ define i64 @test_trunc_ui64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s1, a0
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixunssfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI11_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI11_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s1, a0
+; RV32IZFINX-NEXT:    lui a2, 391168
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a2
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -840,8 +846,9 @@ define i64 @test_round_si64(float %x) nounwind {
 ; RV32IF-NEXT:  # %bb.3:
 ; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB13_4:
-; RV32IF-NEXT:    lui a1, %hi(.LCPI13_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI13_0)(a1)
+; RV32IF-NEXT:    lui a1, 389120
+; RV32IF-NEXT:    addi a1, a1, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a1
 ; RV32IF-NEXT:    flt.s a3, fa5, fs0
 ; RV32IF-NEXT:    beqz a3, .LBB13_6
 ; RV32IF-NEXT:  # %bb.5:
@@ -892,9 +899,9 @@ define i64 @test_round_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI13_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI13_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s2, a0
+; RV32IZFINX-NEXT:    lui a2, 389120
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a4
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -978,9 +985,10 @@ define i64 @test_round_ui64(float %x) nounwind {
 ; RV32IF-NEXT:    neg s0, a0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixunssfdi at plt
-; RV32IF-NEXT:    lui a2, %hi(.LCPI15_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI15_0)(a2)
 ; RV32IF-NEXT:    and a0, s0, a0
+; RV32IF-NEXT:    lui a2, 391168
+; RV32IF-NEXT:    addi a2, a2, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a2
 ; RV32IF-NEXT:    flt.s a2, fa5, fs0
 ; RV32IF-NEXT:    neg a2, a2
 ; RV32IF-NEXT:    or a0, a2, a0
@@ -1021,9 +1029,9 @@ define i64 @test_round_ui64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s1, a0
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixunssfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI15_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI15_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s1, a0
+; RV32IZFINX-NEXT:    lui a2, 391168
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a2
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -1100,8 +1108,9 @@ define i64 @test_roundeven_si64(float %x) nounwind {
 ; RV32IF-NEXT:  # %bb.3:
 ; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB17_4:
-; RV32IF-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI17_0)(a1)
+; RV32IF-NEXT:    lui a1, 389120
+; RV32IF-NEXT:    addi a1, a1, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a1
 ; RV32IF-NEXT:    flt.s a3, fa5, fs0
 ; RV32IF-NEXT:    beqz a3, .LBB17_6
 ; RV32IF-NEXT:  # %bb.5:
@@ -1152,9 +1161,9 @@ define i64 @test_roundeven_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI17_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI17_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s2, a0
+; RV32IZFINX-NEXT:    lui a2, 389120
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a4
 ; RV32IZFINX-NEXT:    or a0, a2, a0
@@ -1238,9 +1247,10 @@ define i64 @test_roundeven_ui64(float %x) nounwind {
 ; RV32IF-NEXT:    neg s0, a0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixunssfdi at plt
-; RV32IF-NEXT:    lui a2, %hi(.LCPI19_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI19_0)(a2)
 ; RV32IF-NEXT:    and a0, s0, a0
+; RV32IF-NEXT:    lui a2, 391168
+; RV32IF-NEXT:    addi a2, a2, -1
+; RV32IF-NEXT:    fmv.w.x fa5, a2
 ; RV32IF-NEXT:    flt.s a2, fa5, fs0
 ; RV32IF-NEXT:    neg a2, a2
 ; RV32IF-NEXT:    or a0, a2, a0
@@ -1281,9 +1291,9 @@ define i64 @test_roundeven_ui64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    neg s1, a0
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixunssfdi at plt
-; RV32IZFINX-NEXT:    lui a2, %hi(.LCPI19_0)
-; RV32IZFINX-NEXT:    lw a2, %lo(.LCPI19_0)(a2)
 ; RV32IZFINX-NEXT:    and a0, s1, a0
+; RV32IZFINX-NEXT:    lui a2, 391168
+; RV32IZFINX-NEXT:    addi a2, a2, -1
 ; RV32IZFINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZFINX-NEXT:    neg a2, a2
 ; RV32IZFINX-NEXT:    or a0, a2, a0
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 2d3f40e15fe4324..5759acc139a8066 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -196,11 +196,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV32IZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV32IZFH-NEXT:    feq.s a0, fa5, fa5
 ; RV32IZFH-NEXT:    neg a0, a0
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
 ; RV32IZFH-NEXT:    lui a1, 815104
-; RV32IZFH-NEXT:    fmv.w.x fa3, a1
-; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    lui a1, 290816
+; RV32IZFH-NEXT:    addi a1, a1, -512
+; RV32IZFH-NEXT:    fmv.w.x fa4, a1
 ; RV32IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IZFH-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32IZFH-NEXT:    and a0, a0, a1
@@ -210,12 +211,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV64IZFH:       # %bb.0: # %start
 ; RV64IZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV64IZFH-NEXT:    feq.s a0, fa5, fa5
-; RV64IZFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64IZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; RV64IZFH-NEXT:    lui a1, 815104
-; RV64IZFH-NEXT:    fmv.w.x fa3, a1
-; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa3
 ; RV64IZFH-NEXT:    neg a0, a0
+; RV64IZFH-NEXT:    lui a1, 815104
+; RV64IZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    lui a1, 290816
+; RV64IZFH-NEXT:    addi a1, a1, -512
+; RV64IZFH-NEXT:    fmv.w.x fa4, a1
 ; RV64IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IZFH-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64IZFH-NEXT:    and a0, a0, a1
@@ -226,11 +228,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV32IDZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV32IDZFH-NEXT:    feq.s a0, fa5, fa5
 ; RV32IDZFH-NEXT:    neg a0, a0
-; RV32IDZFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IDZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
 ; RV32IDZFH-NEXT:    lui a1, 815104
-; RV32IDZFH-NEXT:    fmv.w.x fa3, a1
-; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    lui a1, 290816
+; RV32IDZFH-NEXT:    addi a1, a1, -512
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a1
 ; RV32IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IDZFH-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32IDZFH-NEXT:    and a0, a0, a1
@@ -240,12 +243,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV64IDZFH:       # %bb.0: # %start
 ; RV64IDZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV64IDZFH-NEXT:    feq.s a0, fa5, fa5
-; RV64IDZFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64IDZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; RV64IDZFH-NEXT:    lui a1, 815104
-; RV64IDZFH-NEXT:    fmv.w.x fa3, a1
-; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa3
 ; RV64IDZFH-NEXT:    neg a0, a0
+; RV64IDZFH-NEXT:    lui a1, 815104
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    lui a1, 290816
+; RV64IDZFH-NEXT:    addi a1, a1, -512
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a1
 ; RV64IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IDZFH-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64IDZFH-NEXT:    and a0, a0, a1
@@ -255,11 +259,11 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV32IZHINX:       # %bb.0: # %start
 ; RV32IZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZHINX-NEXT:    feq.s a1, a0, a0
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
 ; RV32IZHINX-NEXT:    neg a1, a1
-; RV32IZHINX-NEXT:    lui a3, 815104
-; RV32IZHINX-NEXT:    fmax.s a0, a0, a3
+; RV32IZHINX-NEXT:    lui a2, 815104
+; RV32IZHINX-NEXT:    fmax.s a0, a0, a2
+; RV32IZHINX-NEXT:    lui a2, 290816
+; RV32IZHINX-NEXT:    addi a2, a2, -512
 ; RV32IZHINX-NEXT:    fmin.s a0, a0, a2
 ; RV32IZHINX-NEXT:    fcvt.w.s a0, a0, rtz
 ; RV32IZHINX-NEXT:    and a0, a1, a0
@@ -268,26 +272,26 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV64IZHINX-LABEL: fcvt_si_h_sat:
 ; RV64IZHINX:       # %bb.0: # %start
 ; RV64IZHINX-NEXT:    fcvt.s.h a0, a0
-; RV64IZHINX-NEXT:    lui a1, 815104
-; RV64IZHINX-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV64IZHINX-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
-; RV64IZHINX-NEXT:    fmax.s a1, a0, a1
-; RV64IZHINX-NEXT:    feq.s a0, a0, a0
-; RV64IZHINX-NEXT:    neg a0, a0
-; RV64IZHINX-NEXT:    fmin.s a1, a1, a2
-; RV64IZHINX-NEXT:    fcvt.l.s a1, a1, rtz
-; RV64IZHINX-NEXT:    and a0, a0, a1
+; RV64IZHINX-NEXT:    feq.s a1, a0, a0
+; RV64IZHINX-NEXT:    neg a1, a1
+; RV64IZHINX-NEXT:    lui a2, 815104
+; RV64IZHINX-NEXT:    fmax.s a0, a0, a2
+; RV64IZHINX-NEXT:    lui a2, 290816
+; RV64IZHINX-NEXT:    addiw a2, a2, -512
+; RV64IZHINX-NEXT:    fmin.s a0, a0, a2
+; RV64IZHINX-NEXT:    fcvt.l.s a0, a0, rtz
+; RV64IZHINX-NEXT:    and a0, a1, a0
 ; RV64IZHINX-NEXT:    ret
 ;
 ; RV32IZDINXZHINX-LABEL: fcvt_si_h_sat:
 ; RV32IZDINXZHINX:       # %bb.0: # %start
 ; RV32IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZDINXZHINX-NEXT:    feq.s a1, a0, a0
-; RV32IZDINXZHINX-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV32IZDINXZHINX-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
 ; RV32IZDINXZHINX-NEXT:    neg a1, a1
-; RV32IZDINXZHINX-NEXT:    lui a3, 815104
-; RV32IZDINXZHINX-NEXT:    fmax.s a0, a0, a3
+; RV32IZDINXZHINX-NEXT:    lui a2, 815104
+; RV32IZDINXZHINX-NEXT:    fmax.s a0, a0, a2
+; RV32IZDINXZHINX-NEXT:    lui a2, 290816
+; RV32IZDINXZHINX-NEXT:    addi a2, a2, -512
 ; RV32IZDINXZHINX-NEXT:    fmin.s a0, a0, a2
 ; RV32IZDINXZHINX-NEXT:    fcvt.w.s a0, a0, rtz
 ; RV32IZDINXZHINX-NEXT:    and a0, a1, a0
@@ -296,15 +300,15 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV64IZDINXZHINX-LABEL: fcvt_si_h_sat:
 ; RV64IZDINXZHINX:       # %bb.0: # %start
 ; RV64IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
-; RV64IZDINXZHINX-NEXT:    lui a1, 815104
-; RV64IZDINXZHINX-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV64IZDINXZHINX-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
-; RV64IZDINXZHINX-NEXT:    fmax.s a1, a0, a1
-; RV64IZDINXZHINX-NEXT:    feq.s a0, a0, a0
-; RV64IZDINXZHINX-NEXT:    neg a0, a0
-; RV64IZDINXZHINX-NEXT:    fmin.s a1, a1, a2
-; RV64IZDINXZHINX-NEXT:    fcvt.l.s a1, a1, rtz
-; RV64IZDINXZHINX-NEXT:    and a0, a0, a1
+; RV64IZDINXZHINX-NEXT:    feq.s a1, a0, a0
+; RV64IZDINXZHINX-NEXT:    neg a1, a1
+; RV64IZDINXZHINX-NEXT:    lui a2, 815104
+; RV64IZDINXZHINX-NEXT:    fmax.s a0, a0, a2
+; RV64IZDINXZHINX-NEXT:    lui a2, 290816
+; RV64IZDINXZHINX-NEXT:    addiw a2, a2, -512
+; RV64IZDINXZHINX-NEXT:    fmin.s a0, a0, a2
+; RV64IZDINXZHINX-NEXT:    fcvt.l.s a0, a0, rtz
+; RV64IZDINXZHINX-NEXT:    and a0, a1, a0
 ; RV64IZDINXZHINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_si_h_sat:
@@ -401,11 +405,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a0
 ; RV32ID-ILP32-NEXT:    feq.s a0, fa5, fa5
 ; RV32ID-ILP32-NEXT:    neg a0, a0
-; RV32ID-ILP32-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32ID-ILP32-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
 ; RV32ID-ILP32-NEXT:    lui a1, 815104
-; RV32ID-ILP32-NEXT:    fmv.w.x fa3, a1
-; RV32ID-ILP32-NEXT:    fmax.s fa5, fa5, fa3
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a1
+; RV32ID-ILP32-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-ILP32-NEXT:    lui a1, 290816
+; RV32ID-ILP32-NEXT:    addi a1, a1, -512
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a1
 ; RV32ID-ILP32-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-ILP32-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32ID-ILP32-NEXT:    and a0, a0, a1
@@ -420,12 +425,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV64ID-LP64-NEXT:    call __extendhfsf2 at plt
 ; RV64ID-LP64-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-LP64-NEXT:    feq.s a0, fa5, fa5
-; RV64ID-LP64-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64ID-LP64-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; RV64ID-LP64-NEXT:    lui a1, 815104
-; RV64ID-LP64-NEXT:    fmv.w.x fa3, a1
-; RV64ID-LP64-NEXT:    fmax.s fa5, fa5, fa3
 ; RV64ID-LP64-NEXT:    neg a0, a0
+; RV64ID-LP64-NEXT:    lui a1, 815104
+; RV64ID-LP64-NEXT:    fmv.w.x fa4, a1
+; RV64ID-LP64-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-LP64-NEXT:    lui a1, 290816
+; RV64ID-LP64-NEXT:    addi a1, a1, -512
+; RV64ID-LP64-NEXT:    fmv.w.x fa4, a1
 ; RV64ID-LP64-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-LP64-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64ID-LP64-NEXT:    and a0, a0, a1
@@ -440,12 +446,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV32ID-NEXT:    call __extendhfsf2 at plt
 ; RV32ID-NEXT:    feq.s a0, fa0, fa0
 ; RV32ID-NEXT:    neg a0, a0
-; RV32ID-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI1_0)(a1)
 ; RV32ID-NEXT:    lui a1, 815104
+; RV32ID-NEXT:    fmv.w.x fa5, a1
+; RV32ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV32ID-NEXT:    lui a1, 290816
+; RV32ID-NEXT:    addi a1, a1, -512
 ; RV32ID-NEXT:    fmv.w.x fa4, a1
-; RV32ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV32ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32ID-NEXT:    and a0, a0, a1
 ; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -458,13 +465,14 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-NEXT:    call __extendhfsf2 at plt
 ; RV64ID-NEXT:    feq.s a0, fa0, fa0
-; RV64ID-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64ID-NEXT:    flw fa5, %lo(.LCPI1_0)(a1)
+; RV64ID-NEXT:    neg a0, a0
 ; RV64ID-NEXT:    lui a1, 815104
+; RV64ID-NEXT:    fmv.w.x fa5, a1
+; RV64ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV64ID-NEXT:    lui a1, 290816
+; RV64ID-NEXT:    addi a1, a1, -512
 ; RV64ID-NEXT:    fmv.w.x fa4, a1
-; RV64ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV64ID-NEXT:    neg a0, a0
-; RV64ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64ID-NEXT:    and a0, a0, a1
 ; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -476,11 +484,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; CHECK32-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
 ; CHECK32-IZFHMIN-NEXT:    neg a0, a0
-; CHECK32-IZFHMIN-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK32-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
 ; CHECK32-IZFHMIN-NEXT:    lui a1, 815104
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, a1
-; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    lui a1, 290816
+; CHECK32-IZFHMIN-NEXT:    addi a1, a1, -512
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a1, fa5, rtz
 ; CHECK32-IZFHMIN-NEXT:    and a0, a0, a1
@@ -490,12 +499,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
 ; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; CHECK64-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
-; CHECK64-IZFHMIN-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK64-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; CHECK64-IZFHMIN-NEXT:    lui a1, 815104
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, a1
-; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
 ; CHECK64-IZFHMIN-NEXT:    neg a0, a0
+; CHECK64-IZFHMIN-NEXT:    lui a1, 815104
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    lui a1, 290816
+; CHECK64-IZFHMIN-NEXT:    addi a1, a1, -512
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a1, fa5, rtz
 ; CHECK64-IZFHMIN-NEXT:    and a0, a0, a1
@@ -505,11 +515,11 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; CHECK32-IZHINXMIN:       # %bb.0: # %start
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZHINXMIN-NEXT:    feq.s a1, a0, a0
-; CHECK32-IZHINXMIN-NEXT:    lui a2, %hi(.LCPI1_0)
-; CHECK32-IZHINXMIN-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
 ; CHECK32-IZHINXMIN-NEXT:    neg a1, a1
-; CHECK32-IZHINXMIN-NEXT:    lui a3, 815104
-; CHECK32-IZHINXMIN-NEXT:    fmax.s a0, a0, a3
+; CHECK32-IZHINXMIN-NEXT:    lui a2, 815104
+; CHECK32-IZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK32-IZHINXMIN-NEXT:    lui a2, 290816
+; CHECK32-IZHINXMIN-NEXT:    addi a2, a2, -512
 ; CHECK32-IZHINXMIN-NEXT:    fmin.s a0, a0, a2
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.w.s a0, a0, rtz
 ; CHECK32-IZHINXMIN-NEXT:    and a0, a1, a0
@@ -518,26 +528,26 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; CHECK64-IZHINXMIN-LABEL: fcvt_si_h_sat:
 ; CHECK64-IZHINXMIN:       # %bb.0: # %start
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK64-IZHINXMIN-NEXT:    lui a1, 815104
-; CHECK64-IZHINXMIN-NEXT:    lui a2, %hi(.LCPI1_0)
-; CHECK64-IZHINXMIN-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
-; CHECK64-IZHINXMIN-NEXT:    fmax.s a1, a0, a1
-; CHECK64-IZHINXMIN-NEXT:    feq.s a0, a0, a0
-; CHECK64-IZHINXMIN-NEXT:    neg a0, a0
-; CHECK64-IZHINXMIN-NEXT:    fmin.s a1, a1, a2
-; CHECK64-IZHINXMIN-NEXT:    fcvt.l.s a1, a1, rtz
-; CHECK64-IZHINXMIN-NEXT:    and a0, a0, a1
+; CHECK64-IZHINXMIN-NEXT:    feq.s a1, a0, a0
+; CHECK64-IZHINXMIN-NEXT:    neg a1, a1
+; CHECK64-IZHINXMIN-NEXT:    lui a2, 815104
+; CHECK64-IZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK64-IZHINXMIN-NEXT:    lui a2, 290816
+; CHECK64-IZHINXMIN-NEXT:    addiw a2, a2, -512
+; CHECK64-IZHINXMIN-NEXT:    fmin.s a0, a0, a2
+; CHECK64-IZHINXMIN-NEXT:    fcvt.l.s a0, a0, rtz
+; CHECK64-IZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZHINXMIN-NEXT:    ret
 ;
 ; CHECK32-IZDINXZHINXMIN-LABEL: fcvt_si_h_sat:
 ; CHECK32-IZDINXZHINXMIN:       # %bb.0: # %start
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    feq.s a1, a0, a0
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a2, %hi(.LCPI1_0)
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
 ; CHECK32-IZDINXZHINXMIN-NEXT:    neg a1, a1
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a3, 815104
-; CHECK32-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, a3
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a2, 815104
+; CHECK32-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a2, 290816
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a2, a2, -512
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a2
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.w.s a0, a0, rtz
 ; CHECK32-IZDINXZHINXMIN-NEXT:    and a0, a1, a0
@@ -546,15 +556,15 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_si_h_sat:
 ; CHECK64-IZDINXZHINXMIN:       # %bb.0: # %start
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    lui a1, 815104
-; CHECK64-IZDINXZHINXMIN-NEXT:    lui a2, %hi(.LCPI1_0)
-; CHECK64-IZDINXZHINXMIN-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
-; CHECK64-IZDINXZHINXMIN-NEXT:    fmax.s a1, a0, a1
-; CHECK64-IZDINXZHINXMIN-NEXT:    feq.s a0, a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    neg a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    fmin.s a1, a1, a2
-; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.l.s a1, a1, rtz
-; CHECK64-IZDINXZHINXMIN-NEXT:    and a0, a0, a1
+; CHECK64-IZDINXZHINXMIN-NEXT:    feq.s a1, a0, a0
+; CHECK64-IZDINXZHINXMIN-NEXT:    neg a1, a1
+; CHECK64-IZDINXZHINXMIN-NEXT:    lui a2, 815104
+; CHECK64-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK64-IZDINXZHINXMIN-NEXT:    lui a2, 290816
+; CHECK64-IZDINXZHINXMIN-NEXT:    addiw a2, a2, -512
+; CHECK64-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a2
+; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.l.s a0, a0, rtz
+; CHECK64-IZDINXZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    ret
 start:
   %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a)
@@ -711,84 +721,88 @@ define i16 @fcvt_ui_h(half %a) nounwind {
 define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV32IZFH-LABEL: fcvt_ui_h_sat:
 ; RV32IZFH:       # %bb.0: # %start
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; RV32IZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV32IZFH-NEXT:    fmv.w.x fa3, zero
-; RV32IZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV32IZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV32IZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV32IZFH-NEXT:    fmv.w.x fa4, zero
+; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    lui a0, 292864
+; RV32IZFH-NEXT:    addi a0, a0, -256
+; RV32IZFH-NEXT:    fmv.w.x fa4, a0
+; RV32IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IZFH-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32IZFH-NEXT:    ret
 ;
 ; RV64IZFH-LABEL: fcvt_ui_h_sat:
 ; RV64IZFH:       # %bb.0: # %start
-; RV64IZFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64IZFH-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; RV64IZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV64IZFH-NEXT:    fmv.w.x fa3, zero
-; RV64IZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV64IZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV64IZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV64IZFH-NEXT:    fmv.w.x fa4, zero
+; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    lui a0, 292864
+; RV64IZFH-NEXT:    addi a0, a0, -256
+; RV64IZFH-NEXT:    fmv.w.x fa4, a0
+; RV64IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IZFH-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64IZFH-NEXT:    ret
 ;
 ; RV32IDZFH-LABEL: fcvt_ui_h_sat:
 ; RV32IDZFH:       # %bb.0: # %start
-; RV32IDZFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32IDZFH-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; RV32IDZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV32IDZFH-NEXT:    fmv.w.x fa3, zero
-; RV32IDZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV32IDZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV32IDZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV32IDZFH-NEXT:    fmv.w.x fa4, zero
+; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    lui a0, 292864
+; RV32IDZFH-NEXT:    addi a0, a0, -256
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a0
+; RV32IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IDZFH-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32IDZFH-NEXT:    ret
 ;
 ; RV64IDZFH-LABEL: fcvt_ui_h_sat:
 ; RV64IDZFH:       # %bb.0: # %start
-; RV64IDZFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64IDZFH-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; RV64IDZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV64IDZFH-NEXT:    fmv.w.x fa3, zero
-; RV64IDZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV64IDZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV64IDZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV64IDZFH-NEXT:    fmv.w.x fa4, zero
+; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    lui a0, 292864
+; RV64IDZFH-NEXT:    addi a0, a0, -256
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a0
+; RV64IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IDZFH-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64IDZFH-NEXT:    ret
 ;
 ; RV32IZHINX-LABEL: fcvt_ui_h_sat:
 ; RV32IZHINX:       # %bb.0: # %start
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV32IZHINX-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; RV32IZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZHINX-NEXT:    fmax.s a0, a0, zero
+; RV32IZHINX-NEXT:    lui a1, 292864
+; RV32IZHINX-NEXT:    addi a1, a1, -256
 ; RV32IZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV32IZHINX-NEXT:    fcvt.wu.s a0, a0, rtz
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: fcvt_ui_h_sat:
 ; RV64IZHINX:       # %bb.0: # %start
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV64IZHINX-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; RV64IZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV64IZHINX-NEXT:    fmax.s a0, a0, zero
+; RV64IZHINX-NEXT:    lui a1, 292864
+; RV64IZHINX-NEXT:    addiw a1, a1, -256
 ; RV64IZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV64IZHINX-NEXT:    fcvt.lu.s a0, a0, rtz
 ; RV64IZHINX-NEXT:    ret
 ;
 ; RV32IZDINXZHINX-LABEL: fcvt_ui_h_sat:
 ; RV32IZDINXZHINX:       # %bb.0: # %start
-; RV32IZDINXZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV32IZDINXZHINX-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; RV32IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZDINXZHINX-NEXT:    fmax.s a0, a0, zero
+; RV32IZDINXZHINX-NEXT:    lui a1, 292864
+; RV32IZDINXZHINX-NEXT:    addi a1, a1, -256
 ; RV32IZDINXZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV32IZDINXZHINX-NEXT:    fcvt.wu.s a0, a0, rtz
 ; RV32IZDINXZHINX-NEXT:    ret
 ;
 ; RV64IZDINXZHINX-LABEL: fcvt_ui_h_sat:
 ; RV64IZDINXZHINX:       # %bb.0: # %start
-; RV64IZDINXZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV64IZDINXZHINX-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; RV64IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV64IZDINXZHINX-NEXT:    fmax.s a0, a0, zero
+; RV64IZDINXZHINX-NEXT:    lui a1, 292864
+; RV64IZDINXZHINX-NEXT:    addiw a1, a1, -256
 ; RV64IZDINXZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV64IZDINXZHINX-NEXT:    fcvt.lu.s a0, a0, rtz
 ; RV64IZDINXZHINX-NEXT:    ret
@@ -874,12 +888,13 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV32ID-ILP32-NEXT:    addi sp, sp, -16
 ; RV32ID-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32ID-ILP32-NEXT:    call __extendhfsf2 at plt
-; RV32ID-ILP32-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV32ID-ILP32-NEXT:    flw fa5, %lo(.LCPI3_0)(a1)
+; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a0
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, zero
+; RV32ID-ILP32-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-ILP32-NEXT:    lui a0, 292864
+; RV32ID-ILP32-NEXT:    addi a0, a0, -256
 ; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a0
-; RV32ID-ILP32-NEXT:    fmv.w.x fa3, zero
-; RV32ID-ILP32-NEXT:    fmax.s fa4, fa4, fa3
-; RV32ID-ILP32-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-ILP32-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-ILP32-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32ID-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32ID-ILP32-NEXT:    addi sp, sp, 16
@@ -890,12 +905,13 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV64ID-LP64-NEXT:    addi sp, sp, -16
 ; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-LP64-NEXT:    call __extendhfsf2 at plt
-; RV64ID-LP64-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV64ID-LP64-NEXT:    flw fa5, %lo(.LCPI3_0)(a1)
+; RV64ID-LP64-NEXT:    fmv.w.x fa5, a0
+; RV64ID-LP64-NEXT:    fmv.w.x fa4, zero
+; RV64ID-LP64-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-LP64-NEXT:    lui a0, 292864
+; RV64ID-LP64-NEXT:    addi a0, a0, -256
 ; RV64ID-LP64-NEXT:    fmv.w.x fa4, a0
-; RV64ID-LP64-NEXT:    fmv.w.x fa3, zero
-; RV64ID-LP64-NEXT:    fmax.s fa4, fa4, fa3
-; RV64ID-LP64-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-LP64-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-LP64-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64ID-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64ID-LP64-NEXT:    addi sp, sp, 16
@@ -906,11 +922,12 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV32ID-NEXT:    addi sp, sp, -16
 ; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32ID-NEXT:    call __extendhfsf2 at plt
-; RV32ID-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; RV32ID-NEXT:    fmv.w.x fa4, zero
-; RV32ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV32ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-NEXT:    fmv.w.x fa5, zero
+; RV32ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV32ID-NEXT:    lui a0, 292864
+; RV32ID-NEXT:    addi a0, a0, -256
+; RV32ID-NEXT:    fmv.w.x fa4, a0
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32ID-NEXT:    addi sp, sp, 16
@@ -921,11 +938,12 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV64ID-NEXT:    addi sp, sp, -16
 ; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-NEXT:    call __extendhfsf2 at plt
-; RV64ID-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; RV64ID-NEXT:    fmv.w.x fa4, zero
-; RV64ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV64ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-NEXT:    fmv.w.x fa5, zero
+; RV64ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV64ID-NEXT:    lui a0, 292864
+; RV64ID-NEXT:    addi a0, a0, -256
+; RV64ID-NEXT:    fmv.w.x fa4, a0
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64ID-NEXT:    addi sp, sp, 16
@@ -933,62 +951,64 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ;
 ; CHECK32-IZFHMIN-LABEL: fcvt_ui_h_sat:
 ; CHECK32-IZFHMIN:       # %bb.0: # %start
-; CHECK32-IZFHMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK32-IZFHMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa4, fa0
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK32-IZFHMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    lui a0, 292864
+; CHECK32-IZFHMIN-NEXT:    addi a0, a0, -256
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32-IZFHMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK32-IZFHMIN-NEXT:    ret
 ;
 ; CHECK64-IZFHMIN-LABEL: fcvt_ui_h_sat:
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
-; CHECK64-IZFHMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK64-IZFHMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa4, fa0
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK64-IZFHMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    lui a0, 292864
+; CHECK64-IZFHMIN-NEXT:    addi a0, a0, -256
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64-IZFHMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; CHECK64-IZFHMIN-NEXT:    ret
 ;
 ; CHECK32-IZHINXMIN-LABEL: fcvt_ui_h_sat:
 ; CHECK32-IZHINXMIN:       # %bb.0: # %start
-; CHECK32-IZHINXMIN-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK32-IZHINXMIN-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK32-IZHINXMIN-NEXT:    lui a1, 292864
+; CHECK32-IZHINXMIN-NEXT:    addi a1, a1, -256
 ; CHECK32-IZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.wu.s a0, a0, rtz
 ; CHECK32-IZHINXMIN-NEXT:    ret
 ;
 ; CHECK64-IZHINXMIN-LABEL: fcvt_ui_h_sat:
 ; CHECK64-IZHINXMIN:       # %bb.0: # %start
-; CHECK64-IZHINXMIN-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK64-IZHINXMIN-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK64-IZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK64-IZHINXMIN-NEXT:    lui a1, 292864
+; CHECK64-IZHINXMIN-NEXT:    addiw a1, a1, -256
 ; CHECK64-IZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.lu.s a0, a0, rtz
 ; CHECK64-IZHINXMIN-NEXT:    ret
 ;
 ; CHECK32-IZDINXZHINXMIN-LABEL: fcvt_ui_h_sat:
 ; CHECK32-IZDINXZHINXMIN:       # %bb.0: # %start
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, 292864
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a1, a1, -256
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.wu.s a0, a0, rtz
 ; CHECK32-IZDINXZHINXMIN-NEXT:    ret
 ;
 ; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_ui_h_sat:
 ; CHECK64-IZDINXZHINXMIN:       # %bb.0: # %start
-; CHECK64-IZDINXZHINXMIN-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK64-IZDINXZHINXMIN-NEXT:    lw a1, %lo(.LCPI3_0)(a1)
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK64-IZDINXZHINXMIN-NEXT:    lui a1, 292864
+; CHECK64-IZDINXZHINXMIN-NEXT:    addiw a1, a1, -256
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.lu.s a0, a0, rtz
 ; CHECK64-IZDINXZHINXMIN-NEXT:    ret
@@ -2161,8 +2181,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IZFH-NEXT:  # %bb.1: # %start
 ; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB10_2: # %start
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IZFH-NEXT:    lui a1, 389120
+; RV32IZFH-NEXT:    addi a1, a1, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFH-NEXT:    beqz a3, .LBB10_4
 ; RV32IZFH-NEXT:  # %bb.3:
@@ -2171,9 +2192,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IZFH-NEXT:    feq.s a1, fs0, fs0
 ; RV32IZFH-NEXT:    neg a4, a1
 ; RV32IZFH-NEXT:    and a1, a4, a2
+; RV32IZFH-NEXT:    neg a2, s0
+; RV32IZFH-NEXT:    and a0, a2, a0
 ; RV32IZFH-NEXT:    neg a2, a3
-; RV32IZFH-NEXT:    neg a3, s0
-; RV32IZFH-NEXT:    and a0, a3, a0
 ; RV32IZFH-NEXT:    or a0, a2, a0
 ; RV32IZFH-NEXT:    and a0, a4, a0
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -2209,8 +2230,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IDZFH-NEXT:  # %bb.1: # %start
 ; RV32IDZFH-NEXT:    mv a2, a1
 ; RV32IDZFH-NEXT:  .LBB10_2: # %start
-; RV32IDZFH-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IDZFH-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IDZFH-NEXT:    lui a1, 389120
+; RV32IDZFH-NEXT:    addi a1, a1, -1
+; RV32IDZFH-NEXT:    fmv.w.x fa5, a1
 ; RV32IDZFH-NEXT:    flt.s a3, fa5, fs0
 ; RV32IDZFH-NEXT:    beqz a3, .LBB10_4
 ; RV32IDZFH-NEXT:  # %bb.3:
@@ -2219,9 +2241,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IDZFH-NEXT:    feq.s a1, fs0, fs0
 ; RV32IDZFH-NEXT:    neg a4, a1
 ; RV32IDZFH-NEXT:    and a1, a4, a2
+; RV32IDZFH-NEXT:    neg a2, s0
+; RV32IDZFH-NEXT:    and a0, a2, a0
 ; RV32IDZFH-NEXT:    neg a2, a3
-; RV32IDZFH-NEXT:    neg a3, s0
-; RV32IDZFH-NEXT:    and a0, a3, a0
 ; RV32IDZFH-NEXT:    or a0, a2, a0
 ; RV32IDZFH-NEXT:    and a0, a4, a0
 ; RV32IDZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -2248,10 +2270,10 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IZHINX-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IZHINX-NEXT:    lw a1, %lo(.LCPI10_0)(a1)
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
-; RV32IZHINX-NEXT:    flt.s s1, a1, s0
+; RV32IZHINX-NEXT:    lui a0, 389120
+; RV32IZHINX-NEXT:    addi a0, a0, -1
+; RV32IZHINX-NEXT:    flt.s s1, a0, s0
 ; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    lui a0, 913408
 ; RV32IZHINX-NEXT:    fle.s s3, a0, s0
@@ -2301,10 +2323,10 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IZDINXZHINX-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32IZDINXZHINX-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IZDINXZHINX-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IZDINXZHINX-NEXT:    lw a1, %lo(.LCPI10_0)(a1)
 ; RV32IZDINXZHINX-NEXT:    fcvt.s.h s0, a0
-; RV32IZDINXZHINX-NEXT:    flt.s s1, a1, s0
+; RV32IZDINXZHINX-NEXT:    lui a0, 389120
+; RV32IZDINXZHINX-NEXT:    addi a0, a0, -1
+; RV32IZDINXZHINX-NEXT:    flt.s s1, a0, s0
 ; RV32IZDINXZHINX-NEXT:    neg s2, s1
 ; RV32IZDINXZHINX-NEXT:    lui a0, 913408
 ; RV32IZDINXZHINX-NEXT:    fle.s s3, a0, s0
@@ -2464,8 +2486,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32ID-ILP32-NEXT:  # %bb.1: # %start
 ; RV32ID-ILP32-NEXT:    mv a2, a1
 ; RV32ID-ILP32-NEXT:  .LBB10_2: # %start
-; RV32ID-ILP32-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32ID-ILP32-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32ID-ILP32-NEXT:    lui a1, 389120
+; RV32ID-ILP32-NEXT:    addi a1, a1, -1
+; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a1
 ; RV32ID-ILP32-NEXT:    flw fa4, 4(sp) # 4-byte Folded Reload
 ; RV32ID-ILP32-NEXT:    flt.s a3, fa5, fa4
 ; RV32ID-ILP32-NEXT:    fmv.s fa5, fa4
@@ -2519,8 +2542,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32ID-NEXT:  # %bb.1: # %start
 ; RV32ID-NEXT:    mv a2, a1
 ; RV32ID-NEXT:  .LBB10_2: # %start
-; RV32ID-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32ID-NEXT:    lui a1, 389120
+; RV32ID-NEXT:    addi a1, a1, -1
+; RV32ID-NEXT:    fmv.w.x fa5, a1
 ; RV32ID-NEXT:    flt.s a3, fa5, fs0
 ; RV32ID-NEXT:    beqz a3, .LBB10_4
 ; RV32ID-NEXT:  # %bb.3:
@@ -2529,9 +2553,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32ID-NEXT:    feq.s a1, fs0, fs0
 ; RV32ID-NEXT:    neg a4, a1
 ; RV32ID-NEXT:    and a1, a4, a2
+; RV32ID-NEXT:    neg a2, s0
+; RV32ID-NEXT:    and a0, a2, a0
 ; RV32ID-NEXT:    neg a2, a3
-; RV32ID-NEXT:    neg a3, s0
-; RV32ID-NEXT:    and a0, a3, a0
 ; RV32ID-NEXT:    or a0, a2, a0
 ; RV32ID-NEXT:    and a0, a4, a0
 ; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -2572,8 +2596,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IFZFHMIN-NEXT:  # %bb.1: # %start
 ; RV32IFZFHMIN-NEXT:    mv a2, a1
 ; RV32IFZFHMIN-NEXT:  .LBB10_2: # %start
-; RV32IFZFHMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IFZFHMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IFZFHMIN-NEXT:    lui a1, 389120
+; RV32IFZFHMIN-NEXT:    addi a1, a1, -1
+; RV32IFZFHMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IFZFHMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IFZFHMIN-NEXT:    beqz a3, .LBB10_4
 ; RV32IFZFHMIN-NEXT:  # %bb.3:
@@ -2582,9 +2607,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IFZFHMIN-NEXT:    feq.s a1, fs0, fs0
 ; RV32IFZFHMIN-NEXT:    neg a4, a1
 ; RV32IFZFHMIN-NEXT:    and a1, a4, a2
+; RV32IFZFHMIN-NEXT:    neg a2, s0
+; RV32IFZFHMIN-NEXT:    and a0, a2, a0
 ; RV32IFZFHMIN-NEXT:    neg a2, a3
-; RV32IFZFHMIN-NEXT:    neg a3, s0
-; RV32IFZFHMIN-NEXT:    and a0, a3, a0
 ; RV32IFZFHMIN-NEXT:    or a0, a2, a0
 ; RV32IFZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IFZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -2621,8 +2646,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IDZFHMIN-NEXT:  # %bb.1: # %start
 ; RV32IDZFHMIN-NEXT:    mv a2, a1
 ; RV32IDZFHMIN-NEXT:  .LBB10_2: # %start
-; RV32IDZFHMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IDZFHMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IDZFHMIN-NEXT:    lui a1, 389120
+; RV32IDZFHMIN-NEXT:    addi a1, a1, -1
+; RV32IDZFHMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IDZFHMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IDZFHMIN-NEXT:    beqz a3, .LBB10_4
 ; RV32IDZFHMIN-NEXT:  # %bb.3:
@@ -2631,9 +2657,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IDZFHMIN-NEXT:    feq.s a1, fs0, fs0
 ; RV32IDZFHMIN-NEXT:    neg a4, a1
 ; RV32IDZFHMIN-NEXT:    and a1, a4, a2
+; RV32IDZFHMIN-NEXT:    neg a2, s0
+; RV32IDZFHMIN-NEXT:    and a0, a2, a0
 ; RV32IDZFHMIN-NEXT:    neg a2, a3
-; RV32IDZFHMIN-NEXT:    neg a3, s0
-; RV32IDZFHMIN-NEXT:    and a0, a3, a0
 ; RV32IDZFHMIN-NEXT:    or a0, a2, a0
 ; RV32IDZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IDZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -2651,10 +2677,10 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; CHECK32-IZHINXMIN-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; CHECK32-IZHINXMIN-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; CHECK32-IZHINXMIN-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; CHECK32-IZHINXMIN-NEXT:    lw a1, %lo(.LCPI10_0)(a1)
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.s.h s0, a0
-; CHECK32-IZHINXMIN-NEXT:    flt.s s1, a1, s0
+; CHECK32-IZHINXMIN-NEXT:    lui a0, 389120
+; CHECK32-IZHINXMIN-NEXT:    addi a0, a0, -1
+; CHECK32-IZHINXMIN-NEXT:    flt.s s1, a0, s0
 ; CHECK32-IZHINXMIN-NEXT:    neg s2, s1
 ; CHECK32-IZHINXMIN-NEXT:    lui a0, 913408
 ; CHECK32-IZHINXMIN-NEXT:    fle.s s3, a0, s0
@@ -2705,10 +2731,10 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; CHECK32-IZDINXZHINXMIN-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; CHECK32-IZDINXZHINXMIN-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; CHECK32-IZDINXZHINXMIN-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw a1, %lo(.LCPI10_0)(a1)
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.s.h s0, a0
-; CHECK32-IZDINXZHINXMIN-NEXT:    flt.s s1, a1, s0
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a0, 389120
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a0, a0, -1
+; CHECK32-IZDINXZHINXMIN-NEXT:    flt.s s1, a0, s0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    neg s2, s1
 ; CHECK32-IZDINXZHINXMIN-NEXT:    lui a0, 913408
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fle.s s3, a0, s0
@@ -2931,23 +2957,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32IZFH-NEXT:    addi sp, sp, -16
 ; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI12_0)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI12_0)(a0)
-; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
-; RV32IZFH-NEXT:    flt.s a0, fa5, fa0
-; RV32IZFH-NEXT:    neg s0, a0
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fs0, fa0
 ; RV32IZFH-NEXT:    fmv.w.x fa5, zero
-; RV32IZFH-NEXT:    fle.s a0, fa5, fa0
-; RV32IZFH-NEXT:    neg s1, a0
+; RV32IZFH-NEXT:    fle.s a0, fa5, fs0
+; RV32IZFH-NEXT:    neg s0, a0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixunssfdi at plt
-; RV32IZFH-NEXT:    and a0, s1, a0
-; RV32IZFH-NEXT:    or a0, s0, a0
-; RV32IZFH-NEXT:    and a1, s1, a1
-; RV32IZFH-NEXT:    or a1, s0, a1
+; RV32IZFH-NEXT:    and a0, s0, a0
+; RV32IZFH-NEXT:    lui a2, 391168
+; RV32IZFH-NEXT:    addi a2, a2, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a2
+; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
+; RV32IZFH-NEXT:    neg a2, a2
+; RV32IZFH-NEXT:    or a0, a2, a0
+; RV32IZFH-NEXT:    and a1, s0, a1
+; RV32IZFH-NEXT:    or a1, a2, a1
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    addi sp, sp, 16
 ; RV32IZFH-NEXT:    ret
 ;
@@ -2965,23 +2993,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32IDZFH-NEXT:    addi sp, sp, -16
 ; RV32IDZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IDZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IDZFH-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IDZFH-NEXT:    lui a0, %hi(.LCPI12_0)
-; RV32IDZFH-NEXT:    flw fa5, %lo(.LCPI12_0)(a0)
-; RV32IDZFH-NEXT:    fcvt.s.h fa0, fa0
-; RV32IDZFH-NEXT:    flt.s a0, fa5, fa0
-; RV32IDZFH-NEXT:    neg s0, a0
+; RV32IDZFH-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IDZFH-NEXT:    fcvt.s.h fs0, fa0
 ; RV32IDZFH-NEXT:    fmv.w.x fa5, zero
-; RV32IDZFH-NEXT:    fle.s a0, fa5, fa0
-; RV32IDZFH-NEXT:    neg s1, a0
+; RV32IDZFH-NEXT:    fle.s a0, fa5, fs0
+; RV32IDZFH-NEXT:    neg s0, a0
+; RV32IDZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IDZFH-NEXT:    call __fixunssfdi at plt
-; RV32IDZFH-NEXT:    and a0, s1, a0
-; RV32IDZFH-NEXT:    or a0, s0, a0
-; RV32IDZFH-NEXT:    and a1, s1, a1
-; RV32IDZFH-NEXT:    or a1, s0, a1
+; RV32IDZFH-NEXT:    and a0, s0, a0
+; RV32IDZFH-NEXT:    lui a2, 391168
+; RV32IDZFH-NEXT:    addi a2, a2, -1
+; RV32IDZFH-NEXT:    fmv.w.x fa5, a2
+; RV32IDZFH-NEXT:    flt.s a2, fa5, fs0
+; RV32IDZFH-NEXT:    neg a2, a2
+; RV32IDZFH-NEXT:    or a0, a2, a0
+; RV32IDZFH-NEXT:    and a1, s0, a1
+; RV32IDZFH-NEXT:    or a1, a2, a1
 ; RV32IDZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IDZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IDZFH-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32IDZFH-NEXT:    addi sp, sp, 16
 ; RV32IDZFH-NEXT:    ret
 ;
@@ -3000,9 +3030,9 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV32IZHINX-NEXT:    lw a1, %lo(.LCPI12_0)(a1)
 ; RV32IZHINX-NEXT:    fcvt.s.h a0, a0
+; RV32IZHINX-NEXT:    lui a1, 391168
+; RV32IZHINX-NEXT:    addi a1, a1, -1
 ; RV32IZHINX-NEXT:    flt.s a1, a1, a0
 ; RV32IZHINX-NEXT:    neg s0, a1
 ; RV32IZHINX-NEXT:    fle.s a1, zero, a0
@@ -3033,9 +3063,9 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32IZDINXZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZDINXZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZDINXZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV32IZDINXZHINX-NEXT:    lw a1, %lo(.LCPI12_0)(a1)
 ; RV32IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
+; RV32IZDINXZHINX-NEXT:    lui a1, 391168
+; RV32IZDINXZHINX-NEXT:    addi a1, a1, -1
 ; RV32IZDINXZHINX-NEXT:    flt.s a1, a1, a0
 ; RV32IZDINXZHINX-NEXT:    neg s0, a1
 ; RV32IZDINXZHINX-NEXT:    fle.s a1, zero, a0
@@ -3131,13 +3161,14 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32ID-ILP32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32ID-ILP32-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32ID-ILP32-NEXT:    call __extendhfsf2 at plt
-; RV32ID-ILP32-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV32ID-ILP32-NEXT:    flw fa5, %lo(.LCPI12_0)(a1)
-; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a0
-; RV32ID-ILP32-NEXT:    flt.s a1, fa5, fa4
+; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a0
+; RV32ID-ILP32-NEXT:    lui a1, 391168
+; RV32ID-ILP32-NEXT:    addi a1, a1, -1
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a1
+; RV32ID-ILP32-NEXT:    flt.s a1, fa4, fa5
 ; RV32ID-ILP32-NEXT:    neg s0, a1
-; RV32ID-ILP32-NEXT:    fmv.w.x fa5, zero
-; RV32ID-ILP32-NEXT:    fle.s a1, fa5, fa4
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, zero
+; RV32ID-ILP32-NEXT:    fle.s a1, fa4, fa5
 ; RV32ID-ILP32-NEXT:    neg s1, a1
 ; RV32ID-ILP32-NEXT:    call __fixunssfdi at plt
 ; RV32ID-ILP32-NEXT:    and a0, s1, a0
@@ -3170,23 +3201,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32ID-NEXT:    addi sp, sp, -16
 ; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32ID-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32ID-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; RV32ID-NEXT:    call __extendhfsf2 at plt
-; RV32ID-NEXT:    lui a0, %hi(.LCPI12_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI12_0)(a0)
-; RV32ID-NEXT:    flt.s a0, fa5, fa0
-; RV32ID-NEXT:    neg s0, a0
+; RV32ID-NEXT:    fmv.s fs0, fa0
 ; RV32ID-NEXT:    fmv.w.x fa5, zero
 ; RV32ID-NEXT:    fle.s a0, fa5, fa0
-; RV32ID-NEXT:    neg s1, a0
+; RV32ID-NEXT:    neg s0, a0
 ; RV32ID-NEXT:    call __fixunssfdi at plt
-; RV32ID-NEXT:    and a0, s1, a0
-; RV32ID-NEXT:    or a0, s0, a0
-; RV32ID-NEXT:    and a1, s1, a1
-; RV32ID-NEXT:    or a1, s0, a1
+; RV32ID-NEXT:    and a0, s0, a0
+; RV32ID-NEXT:    lui a2, 391168
+; RV32ID-NEXT:    addi a2, a2, -1
+; RV32ID-NEXT:    fmv.w.x fa5, a2
+; RV32ID-NEXT:    flt.s a2, fa5, fs0
+; RV32ID-NEXT:    neg a2, a2
+; RV32ID-NEXT:    or a0, a2, a0
+; RV32ID-NEXT:    and a1, s0, a1
+; RV32ID-NEXT:    or a1, a2, a1
 ; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32ID-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32ID-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32ID-NEXT:    addi sp, sp, 16
 ; RV32ID-NEXT:    ret
 ;
@@ -3204,30 +3237,32 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV64ID-NEXT:    addi sp, sp, 16
 ; RV64ID-NEXT:    ret
 ;
-; CHECK32-IZFHMIN-LABEL: fcvt_lu_h_sat:
-; CHECK32-IZFHMIN:       # %bb.0: # %start
-; CHECK32-IZFHMIN-NEXT:    addi sp, sp, -16
-; CHECK32-IZFHMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK32-IZFHMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; CHECK32-IZFHMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32-IZFHMIN-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK32-IZFHMIN-NEXT:    flw fa5, %lo(.LCPI12_0)(a0)
-; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa0, fa0
-; CHECK32-IZFHMIN-NEXT:    flt.s a0, fa5, fa0
-; CHECK32-IZFHMIN-NEXT:    neg s0, a0
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa5, zero
-; CHECK32-IZFHMIN-NEXT:    fle.s a0, fa5, fa0
-; CHECK32-IZFHMIN-NEXT:    neg s1, a0
-; CHECK32-IZFHMIN-NEXT:    call __fixunssfdi at plt
-; CHECK32-IZFHMIN-NEXT:    and a0, s1, a0
-; CHECK32-IZFHMIN-NEXT:    or a0, s0, a0
-; CHECK32-IZFHMIN-NEXT:    and a1, s1, a1
-; CHECK32-IZFHMIN-NEXT:    or a1, s0, a1
-; CHECK32-IZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK32-IZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; CHECK32-IZFHMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; CHECK32-IZFHMIN-NEXT:    addi sp, sp, 16
-; CHECK32-IZFHMIN-NEXT:    ret
+; RV32IFZFHMIN-LABEL: fcvt_lu_h_sat:
+; RV32IFZFHMIN:       # %bb.0: # %start
+; RV32IFZFHMIN-NEXT:    addi sp, sp, -16
+; RV32IFZFHMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT:    fcvt.s.h fs0, fa0
+; RV32IFZFHMIN-NEXT:    fmv.w.x fa5, zero
+; RV32IFZFHMIN-NEXT:    fle.s a0, fa5, fs0
+; RV32IFZFHMIN-NEXT:    neg s0, a0
+; RV32IFZFHMIN-NEXT:    fmv.s fa0, fs0
+; RV32IFZFHMIN-NEXT:    call __fixunssfdi at plt
+; RV32IFZFHMIN-NEXT:    and a0, s0, a0
+; RV32IFZFHMIN-NEXT:    lui a2, 391168
+; RV32IFZFHMIN-NEXT:    addi a2, a2, -1
+; RV32IFZFHMIN-NEXT:    fmv.w.x fa5, a2
+; RV32IFZFHMIN-NEXT:    flt.s a2, fa5, fs0
+; RV32IFZFHMIN-NEXT:    neg a2, a2
+; RV32IFZFHMIN-NEXT:    or a0, a2, a0
+; RV32IFZFHMIN-NEXT:    and a1, s0, a1
+; RV32IFZFHMIN-NEXT:    or a1, a2, a1
+; RV32IFZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT:    addi sp, sp, 16
+; RV32IFZFHMIN-NEXT:    ret
 ;
 ; CHECK64-IZFHMIN-LABEL: fcvt_lu_h_sat:
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
@@ -3239,15 +3274,42 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; CHECK64-IZFHMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZFHMIN-NEXT:    ret
 ;
+; RV32IDZFHMIN-LABEL: fcvt_lu_h_sat:
+; RV32IDZFHMIN:       # %bb.0: # %start
+; RV32IDZFHMIN-NEXT:    addi sp, sp, -16
+; RV32IDZFHMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFHMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IDZFHMIN-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IDZFHMIN-NEXT:    fcvt.s.h fs0, fa0
+; RV32IDZFHMIN-NEXT:    fmv.w.x fa5, zero
+; RV32IDZFHMIN-NEXT:    fle.s a0, fa5, fs0
+; RV32IDZFHMIN-NEXT:    neg s0, a0
+; RV32IDZFHMIN-NEXT:    fmv.s fa0, fs0
+; RV32IDZFHMIN-NEXT:    call __fixunssfdi at plt
+; RV32IDZFHMIN-NEXT:    and a0, s0, a0
+; RV32IDZFHMIN-NEXT:    lui a2, 391168
+; RV32IDZFHMIN-NEXT:    addi a2, a2, -1
+; RV32IDZFHMIN-NEXT:    fmv.w.x fa5, a2
+; RV32IDZFHMIN-NEXT:    flt.s a2, fa5, fs0
+; RV32IDZFHMIN-NEXT:    neg a2, a2
+; RV32IDZFHMIN-NEXT:    or a0, a2, a0
+; RV32IDZFHMIN-NEXT:    and a1, s0, a1
+; RV32IDZFHMIN-NEXT:    or a1, a2, a1
+; RV32IDZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IDZFHMIN-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IDZFHMIN-NEXT:    addi sp, sp, 16
+; RV32IDZFHMIN-NEXT:    ret
+;
 ; CHECK32-IZHINXMIN-LABEL: fcvt_lu_h_sat:
 ; CHECK32-IZHINXMIN:       # %bb.0: # %start
 ; CHECK32-IZHINXMIN-NEXT:    addi sp, sp, -16
 ; CHECK32-IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; CHECK32-IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; CHECK32-IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT:    lui a1, %hi(.LCPI12_0)
-; CHECK32-IZHINXMIN-NEXT:    lw a1, %lo(.LCPI12_0)(a1)
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
+; CHECK32-IZHINXMIN-NEXT:    lui a1, 391168
+; CHECK32-IZHINXMIN-NEXT:    addi a1, a1, -1
 ; CHECK32-IZHINXMIN-NEXT:    flt.s a1, a1, a0
 ; CHECK32-IZHINXMIN-NEXT:    neg s0, a1
 ; CHECK32-IZHINXMIN-NEXT:    fle.s a1, zero, a0
@@ -3279,9 +3341,9 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; CHECK32-IZDINXZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; CHECK32-IZDINXZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; CHECK32-IZDINXZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, %hi(.LCPI12_0)
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw a1, %lo(.LCPI12_0)(a1)
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, 391168
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a1, a1, -1
 ; CHECK32-IZDINXZHINXMIN-NEXT:    flt.s a1, a1, a0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    neg s0, a1
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fle.s a1, zero, a0
@@ -6367,11 +6429,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32IZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV32IZFH-NEXT:    feq.s a0, fa5, fa5
 ; RV32IZFH-NEXT:    neg a0, a0
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV32IZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
 ; RV32IZFH-NEXT:    lui a1, 815104
-; RV32IZFH-NEXT:    fmv.w.x fa3, a1
-; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    lui a1, 290816
+; RV32IZFH-NEXT:    addi a1, a1, -512
+; RV32IZFH-NEXT:    fmv.w.x fa4, a1
 ; RV32IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IZFH-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32IZFH-NEXT:    and a0, a0, a1
@@ -6381,12 +6444,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64IZFH:       # %bb.0: # %start
 ; RV64IZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV64IZFH-NEXT:    feq.s a0, fa5, fa5
-; RV64IZFH-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV64IZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
-; RV64IZFH-NEXT:    lui a1, 815104
-; RV64IZFH-NEXT:    fmv.w.x fa3, a1
-; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa3
 ; RV64IZFH-NEXT:    neg a0, a0
+; RV64IZFH-NEXT:    lui a1, 815104
+; RV64IZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    lui a1, 290816
+; RV64IZFH-NEXT:    addi a1, a1, -512
+; RV64IZFH-NEXT:    fmv.w.x fa4, a1
 ; RV64IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IZFH-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64IZFH-NEXT:    and a0, a0, a1
@@ -6397,11 +6461,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32IDZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV32IDZFH-NEXT:    feq.s a0, fa5, fa5
 ; RV32IDZFH-NEXT:    neg a0, a0
-; RV32IDZFH-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV32IDZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
 ; RV32IDZFH-NEXT:    lui a1, 815104
-; RV32IDZFH-NEXT:    fmv.w.x fa3, a1
-; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    lui a1, 290816
+; RV32IDZFH-NEXT:    addi a1, a1, -512
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a1
 ; RV32IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IDZFH-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32IDZFH-NEXT:    and a0, a0, a1
@@ -6411,12 +6476,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64IDZFH:       # %bb.0: # %start
 ; RV64IDZFH-NEXT:    fcvt.s.h fa5, fa0
 ; RV64IDZFH-NEXT:    feq.s a0, fa5, fa5
-; RV64IDZFH-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV64IDZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
-; RV64IDZFH-NEXT:    lui a1, 815104
-; RV64IDZFH-NEXT:    fmv.w.x fa3, a1
-; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa3
 ; RV64IDZFH-NEXT:    neg a0, a0
+; RV64IDZFH-NEXT:    lui a1, 815104
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    lui a1, 290816
+; RV64IDZFH-NEXT:    addi a1, a1, -512
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a1
 ; RV64IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IDZFH-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64IDZFH-NEXT:    and a0, a0, a1
@@ -6426,11 +6492,11 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32IZHINX:       # %bb.0: # %start
 ; RV32IZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZHINX-NEXT:    feq.s a1, a0, a0
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI32_0)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
 ; RV32IZHINX-NEXT:    neg a1, a1
-; RV32IZHINX-NEXT:    lui a3, 815104
-; RV32IZHINX-NEXT:    fmax.s a0, a0, a3
+; RV32IZHINX-NEXT:    lui a2, 815104
+; RV32IZHINX-NEXT:    fmax.s a0, a0, a2
+; RV32IZHINX-NEXT:    lui a2, 290816
+; RV32IZHINX-NEXT:    addi a2, a2, -512
 ; RV32IZHINX-NEXT:    fmin.s a0, a0, a2
 ; RV32IZHINX-NEXT:    fcvt.w.s a0, a0, rtz
 ; RV32IZHINX-NEXT:    and a0, a1, a0
@@ -6439,26 +6505,26 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64IZHINX-LABEL: fcvt_w_s_sat_i16:
 ; RV64IZHINX:       # %bb.0: # %start
 ; RV64IZHINX-NEXT:    fcvt.s.h a0, a0
-; RV64IZHINX-NEXT:    lui a1, 815104
-; RV64IZHINX-NEXT:    lui a2, %hi(.LCPI32_0)
-; RV64IZHINX-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
-; RV64IZHINX-NEXT:    fmax.s a1, a0, a1
-; RV64IZHINX-NEXT:    feq.s a0, a0, a0
-; RV64IZHINX-NEXT:    neg a0, a0
-; RV64IZHINX-NEXT:    fmin.s a1, a1, a2
-; RV64IZHINX-NEXT:    fcvt.l.s a1, a1, rtz
-; RV64IZHINX-NEXT:    and a0, a0, a1
+; RV64IZHINX-NEXT:    feq.s a1, a0, a0
+; RV64IZHINX-NEXT:    neg a1, a1
+; RV64IZHINX-NEXT:    lui a2, 815104
+; RV64IZHINX-NEXT:    fmax.s a0, a0, a2
+; RV64IZHINX-NEXT:    lui a2, 290816
+; RV64IZHINX-NEXT:    addiw a2, a2, -512
+; RV64IZHINX-NEXT:    fmin.s a0, a0, a2
+; RV64IZHINX-NEXT:    fcvt.l.s a0, a0, rtz
+; RV64IZHINX-NEXT:    and a0, a1, a0
 ; RV64IZHINX-NEXT:    ret
 ;
 ; RV32IZDINXZHINX-LABEL: fcvt_w_s_sat_i16:
 ; RV32IZDINXZHINX:       # %bb.0: # %start
 ; RV32IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZDINXZHINX-NEXT:    feq.s a1, a0, a0
-; RV32IZDINXZHINX-NEXT:    lui a2, %hi(.LCPI32_0)
-; RV32IZDINXZHINX-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
 ; RV32IZDINXZHINX-NEXT:    neg a1, a1
-; RV32IZDINXZHINX-NEXT:    lui a3, 815104
-; RV32IZDINXZHINX-NEXT:    fmax.s a0, a0, a3
+; RV32IZDINXZHINX-NEXT:    lui a2, 815104
+; RV32IZDINXZHINX-NEXT:    fmax.s a0, a0, a2
+; RV32IZDINXZHINX-NEXT:    lui a2, 290816
+; RV32IZDINXZHINX-NEXT:    addi a2, a2, -512
 ; RV32IZDINXZHINX-NEXT:    fmin.s a0, a0, a2
 ; RV32IZDINXZHINX-NEXT:    fcvt.w.s a0, a0, rtz
 ; RV32IZDINXZHINX-NEXT:    and a0, a1, a0
@@ -6467,15 +6533,15 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64IZDINXZHINX-LABEL: fcvt_w_s_sat_i16:
 ; RV64IZDINXZHINX:       # %bb.0: # %start
 ; RV64IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
-; RV64IZDINXZHINX-NEXT:    lui a1, 815104
-; RV64IZDINXZHINX-NEXT:    lui a2, %hi(.LCPI32_0)
-; RV64IZDINXZHINX-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
-; RV64IZDINXZHINX-NEXT:    fmax.s a1, a0, a1
-; RV64IZDINXZHINX-NEXT:    feq.s a0, a0, a0
-; RV64IZDINXZHINX-NEXT:    neg a0, a0
-; RV64IZDINXZHINX-NEXT:    fmin.s a1, a1, a2
-; RV64IZDINXZHINX-NEXT:    fcvt.l.s a1, a1, rtz
-; RV64IZDINXZHINX-NEXT:    and a0, a0, a1
+; RV64IZDINXZHINX-NEXT:    feq.s a1, a0, a0
+; RV64IZDINXZHINX-NEXT:    neg a1, a1
+; RV64IZDINXZHINX-NEXT:    lui a2, 815104
+; RV64IZDINXZHINX-NEXT:    fmax.s a0, a0, a2
+; RV64IZDINXZHINX-NEXT:    lui a2, 290816
+; RV64IZDINXZHINX-NEXT:    addiw a2, a2, -512
+; RV64IZDINXZHINX-NEXT:    fmin.s a0, a0, a2
+; RV64IZDINXZHINX-NEXT:    fcvt.l.s a0, a0, rtz
+; RV64IZDINXZHINX-NEXT:    and a0, a1, a0
 ; RV64IZDINXZHINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -6576,11 +6642,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a0
 ; RV32ID-ILP32-NEXT:    feq.s a0, fa5, fa5
 ; RV32ID-ILP32-NEXT:    neg a0, a0
-; RV32ID-ILP32-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV32ID-ILP32-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
 ; RV32ID-ILP32-NEXT:    lui a1, 815104
-; RV32ID-ILP32-NEXT:    fmv.w.x fa3, a1
-; RV32ID-ILP32-NEXT:    fmax.s fa5, fa5, fa3
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a1
+; RV32ID-ILP32-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-ILP32-NEXT:    lui a1, 290816
+; RV32ID-ILP32-NEXT:    addi a1, a1, -512
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a1
 ; RV32ID-ILP32-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-ILP32-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32ID-ILP32-NEXT:    and a0, a0, a1
@@ -6595,12 +6662,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64ID-LP64-NEXT:    call __extendhfsf2 at plt
 ; RV64ID-LP64-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-LP64-NEXT:    feq.s a0, fa5, fa5
-; RV64ID-LP64-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV64ID-LP64-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
-; RV64ID-LP64-NEXT:    lui a1, 815104
-; RV64ID-LP64-NEXT:    fmv.w.x fa3, a1
-; RV64ID-LP64-NEXT:    fmax.s fa5, fa5, fa3
 ; RV64ID-LP64-NEXT:    neg a0, a0
+; RV64ID-LP64-NEXT:    lui a1, 815104
+; RV64ID-LP64-NEXT:    fmv.w.x fa4, a1
+; RV64ID-LP64-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-LP64-NEXT:    lui a1, 290816
+; RV64ID-LP64-NEXT:    addi a1, a1, -512
+; RV64ID-LP64-NEXT:    fmv.w.x fa4, a1
 ; RV64ID-LP64-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-LP64-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64ID-LP64-NEXT:    and a0, a0, a1
@@ -6615,12 +6683,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32ID-NEXT:    call __extendhfsf2 at plt
 ; RV32ID-NEXT:    feq.s a0, fa0, fa0
 ; RV32ID-NEXT:    neg a0, a0
-; RV32ID-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI32_0)(a1)
 ; RV32ID-NEXT:    lui a1, 815104
+; RV32ID-NEXT:    fmv.w.x fa5, a1
+; RV32ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV32ID-NEXT:    lui a1, 290816
+; RV32ID-NEXT:    addi a1, a1, -512
 ; RV32ID-NEXT:    fmv.w.x fa4, a1
-; RV32ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV32ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.w.s a1, fa5, rtz
 ; RV32ID-NEXT:    and a0, a0, a1
 ; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -6633,13 +6702,14 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-NEXT:    call __extendhfsf2 at plt
 ; RV64ID-NEXT:    feq.s a0, fa0, fa0
-; RV64ID-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV64ID-NEXT:    flw fa5, %lo(.LCPI32_0)(a1)
+; RV64ID-NEXT:    neg a0, a0
 ; RV64ID-NEXT:    lui a1, 815104
+; RV64ID-NEXT:    fmv.w.x fa5, a1
+; RV64ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV64ID-NEXT:    lui a1, 290816
+; RV64ID-NEXT:    addi a1, a1, -512
 ; RV64ID-NEXT:    fmv.w.x fa4, a1
-; RV64ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV64ID-NEXT:    neg a0, a0
-; RV64ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.l.s a1, fa5, rtz
 ; RV64ID-NEXT:    and a0, a0, a1
 ; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -6651,11 +6721,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; CHECK32-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
 ; CHECK32-IZFHMIN-NEXT:    neg a0, a0
-; CHECK32-IZFHMIN-NEXT:    lui a1, %hi(.LCPI32_0)
-; CHECK32-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
 ; CHECK32-IZFHMIN-NEXT:    lui a1, 815104
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, a1
-; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    lui a1, 290816
+; CHECK32-IZFHMIN-NEXT:    addi a1, a1, -512
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a1, fa5, rtz
 ; CHECK32-IZFHMIN-NEXT:    and a0, a0, a1
@@ -6665,12 +6736,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
 ; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; CHECK64-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
-; CHECK64-IZFHMIN-NEXT:    lui a1, %hi(.LCPI32_0)
-; CHECK64-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
-; CHECK64-IZFHMIN-NEXT:    lui a1, 815104
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, a1
-; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
 ; CHECK64-IZFHMIN-NEXT:    neg a0, a0
+; CHECK64-IZFHMIN-NEXT:    lui a1, 815104
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    lui a1, 290816
+; CHECK64-IZFHMIN-NEXT:    addi a1, a1, -512
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a1, fa5, rtz
 ; CHECK64-IZFHMIN-NEXT:    and a0, a0, a1
@@ -6680,11 +6752,11 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; CHECK32-IZHINXMIN:       # %bb.0: # %start
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZHINXMIN-NEXT:    feq.s a1, a0, a0
-; CHECK32-IZHINXMIN-NEXT:    lui a2, %hi(.LCPI32_0)
-; CHECK32-IZHINXMIN-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
 ; CHECK32-IZHINXMIN-NEXT:    neg a1, a1
-; CHECK32-IZHINXMIN-NEXT:    lui a3, 815104
-; CHECK32-IZHINXMIN-NEXT:    fmax.s a0, a0, a3
+; CHECK32-IZHINXMIN-NEXT:    lui a2, 815104
+; CHECK32-IZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK32-IZHINXMIN-NEXT:    lui a2, 290816
+; CHECK32-IZHINXMIN-NEXT:    addi a2, a2, -512
 ; CHECK32-IZHINXMIN-NEXT:    fmin.s a0, a0, a2
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.w.s a0, a0, rtz
 ; CHECK32-IZHINXMIN-NEXT:    and a0, a1, a0
@@ -6693,26 +6765,26 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; CHECK64-IZHINXMIN-LABEL: fcvt_w_s_sat_i16:
 ; CHECK64-IZHINXMIN:       # %bb.0: # %start
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK64-IZHINXMIN-NEXT:    lui a1, 815104
-; CHECK64-IZHINXMIN-NEXT:    lui a2, %hi(.LCPI32_0)
-; CHECK64-IZHINXMIN-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
-; CHECK64-IZHINXMIN-NEXT:    fmax.s a1, a0, a1
-; CHECK64-IZHINXMIN-NEXT:    feq.s a0, a0, a0
-; CHECK64-IZHINXMIN-NEXT:    neg a0, a0
-; CHECK64-IZHINXMIN-NEXT:    fmin.s a1, a1, a2
-; CHECK64-IZHINXMIN-NEXT:    fcvt.l.s a1, a1, rtz
-; CHECK64-IZHINXMIN-NEXT:    and a0, a0, a1
+; CHECK64-IZHINXMIN-NEXT:    feq.s a1, a0, a0
+; CHECK64-IZHINXMIN-NEXT:    neg a1, a1
+; CHECK64-IZHINXMIN-NEXT:    lui a2, 815104
+; CHECK64-IZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK64-IZHINXMIN-NEXT:    lui a2, 290816
+; CHECK64-IZHINXMIN-NEXT:    addiw a2, a2, -512
+; CHECK64-IZHINXMIN-NEXT:    fmin.s a0, a0, a2
+; CHECK64-IZHINXMIN-NEXT:    fcvt.l.s a0, a0, rtz
+; CHECK64-IZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZHINXMIN-NEXT:    ret
 ;
 ; CHECK32-IZDINXZHINXMIN-LABEL: fcvt_w_s_sat_i16:
 ; CHECK32-IZDINXZHINXMIN:       # %bb.0: # %start
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    feq.s a1, a0, a0
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a2, %hi(.LCPI32_0)
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
 ; CHECK32-IZDINXZHINXMIN-NEXT:    neg a1, a1
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a3, 815104
-; CHECK32-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, a3
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a2, 815104
+; CHECK32-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a2, 290816
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a2, a2, -512
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a2
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.w.s a0, a0, rtz
 ; CHECK32-IZDINXZHINXMIN-NEXT:    and a0, a1, a0
@@ -6721,15 +6793,15 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_w_s_sat_i16:
 ; CHECK64-IZDINXZHINXMIN:       # %bb.0: # %start
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    lui a1, 815104
-; CHECK64-IZDINXZHINXMIN-NEXT:    lui a2, %hi(.LCPI32_0)
-; CHECK64-IZDINXZHINXMIN-NEXT:    lw a2, %lo(.LCPI32_0)(a2)
-; CHECK64-IZDINXZHINXMIN-NEXT:    fmax.s a1, a0, a1
-; CHECK64-IZDINXZHINXMIN-NEXT:    feq.s a0, a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    neg a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    fmin.s a1, a1, a2
-; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.l.s a1, a1, rtz
-; CHECK64-IZDINXZHINXMIN-NEXT:    and a0, a0, a1
+; CHECK64-IZDINXZHINXMIN-NEXT:    feq.s a1, a0, a0
+; CHECK64-IZDINXZHINXMIN-NEXT:    neg a1, a1
+; CHECK64-IZDINXZHINXMIN-NEXT:    lui a2, 815104
+; CHECK64-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, a2
+; CHECK64-IZDINXZHINXMIN-NEXT:    lui a2, 290816
+; CHECK64-IZDINXZHINXMIN-NEXT:    addiw a2, a2, -512
+; CHECK64-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a2
+; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.l.s a0, a0, rtz
+; CHECK64-IZDINXZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    ret
 start:
   %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a)
@@ -6885,84 +6957,88 @@ define zeroext i16 @fcvt_wu_s_i16(half %a) nounwind {
 define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
 ; RV32IZFH-LABEL: fcvt_wu_s_sat_i16:
 ; RV32IZFH:       # %bb.0: # %start
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI34_0)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; RV32IZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV32IZFH-NEXT:    fmv.w.x fa3, zero
-; RV32IZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV32IZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV32IZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV32IZFH-NEXT:    fmv.w.x fa4, zero
+; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    lui a0, 292864
+; RV32IZFH-NEXT:    addi a0, a0, -256
+; RV32IZFH-NEXT:    fmv.w.x fa4, a0
+; RV32IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IZFH-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32IZFH-NEXT:    ret
 ;
 ; RV64IZFH-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IZFH:       # %bb.0: # %start
-; RV64IZFH-NEXT:    lui a0, %hi(.LCPI34_0)
-; RV64IZFH-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; RV64IZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV64IZFH-NEXT:    fmv.w.x fa3, zero
-; RV64IZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV64IZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV64IZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV64IZFH-NEXT:    fmv.w.x fa4, zero
+; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    lui a0, 292864
+; RV64IZFH-NEXT:    addi a0, a0, -256
+; RV64IZFH-NEXT:    fmv.w.x fa4, a0
+; RV64IZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IZFH-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64IZFH-NEXT:    ret
 ;
 ; RV32IDZFH-LABEL: fcvt_wu_s_sat_i16:
 ; RV32IDZFH:       # %bb.0: # %start
-; RV32IDZFH-NEXT:    lui a0, %hi(.LCPI34_0)
-; RV32IDZFH-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; RV32IDZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV32IDZFH-NEXT:    fmv.w.x fa3, zero
-; RV32IDZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV32IDZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV32IDZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV32IDZFH-NEXT:    fmv.w.x fa4, zero
+; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    lui a0, 292864
+; RV32IDZFH-NEXT:    addi a0, a0, -256
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a0
+; RV32IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32IDZFH-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32IDZFH-NEXT:    ret
 ;
 ; RV64IDZFH-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IDZFH:       # %bb.0: # %start
-; RV64IDZFH-NEXT:    lui a0, %hi(.LCPI34_0)
-; RV64IDZFH-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; RV64IDZFH-NEXT:    fcvt.s.h fa4, fa0
-; RV64IDZFH-NEXT:    fmv.w.x fa3, zero
-; RV64IDZFH-NEXT:    fmax.s fa4, fa4, fa3
-; RV64IDZFH-NEXT:    fmin.s fa5, fa4, fa5
+; RV64IDZFH-NEXT:    fcvt.s.h fa5, fa0
+; RV64IDZFH-NEXT:    fmv.w.x fa4, zero
+; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    lui a0, 292864
+; RV64IDZFH-NEXT:    addi a0, a0, -256
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a0
+; RV64IDZFH-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64IDZFH-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64IDZFH-NEXT:    ret
 ;
 ; RV32IZHINX-LABEL: fcvt_wu_s_sat_i16:
 ; RV32IZHINX:       # %bb.0: # %start
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI34_0)
-; RV32IZHINX-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; RV32IZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZHINX-NEXT:    fmax.s a0, a0, zero
+; RV32IZHINX-NEXT:    lui a1, 292864
+; RV32IZHINX-NEXT:    addi a1, a1, -256
 ; RV32IZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV32IZHINX-NEXT:    fcvt.wu.s a0, a0, rtz
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IZHINX:       # %bb.0: # %start
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI34_0)
-; RV64IZHINX-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; RV64IZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV64IZHINX-NEXT:    fmax.s a0, a0, zero
+; RV64IZHINX-NEXT:    lui a1, 292864
+; RV64IZHINX-NEXT:    addiw a1, a1, -256
 ; RV64IZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV64IZHINX-NEXT:    fcvt.lu.s a0, a0, rtz
 ; RV64IZHINX-NEXT:    ret
 ;
 ; RV32IZDINXZHINX-LABEL: fcvt_wu_s_sat_i16:
 ; RV32IZDINXZHINX:       # %bb.0: # %start
-; RV32IZDINXZHINX-NEXT:    lui a1, %hi(.LCPI34_0)
-; RV32IZDINXZHINX-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; RV32IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV32IZDINXZHINX-NEXT:    fmax.s a0, a0, zero
+; RV32IZDINXZHINX-NEXT:    lui a1, 292864
+; RV32IZDINXZHINX-NEXT:    addi a1, a1, -256
 ; RV32IZDINXZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV32IZDINXZHINX-NEXT:    fcvt.wu.s a0, a0, rtz
 ; RV32IZDINXZHINX-NEXT:    ret
 ;
 ; RV64IZDINXZHINX-LABEL: fcvt_wu_s_sat_i16:
 ; RV64IZDINXZHINX:       # %bb.0: # %start
-; RV64IZDINXZHINX-NEXT:    lui a1, %hi(.LCPI34_0)
-; RV64IZDINXZHINX-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; RV64IZDINXZHINX-NEXT:    fcvt.s.h a0, a0
 ; RV64IZDINXZHINX-NEXT:    fmax.s a0, a0, zero
+; RV64IZDINXZHINX-NEXT:    lui a1, 292864
+; RV64IZDINXZHINX-NEXT:    addiw a1, a1, -256
 ; RV64IZDINXZHINX-NEXT:    fmin.s a0, a0, a1
 ; RV64IZDINXZHINX-NEXT:    fcvt.lu.s a0, a0, rtz
 ; RV64IZDINXZHINX-NEXT:    ret
@@ -7054,12 +7130,13 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
 ; RV32ID-ILP32-NEXT:    addi sp, sp, -16
 ; RV32ID-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32ID-ILP32-NEXT:    call __extendhfsf2 at plt
-; RV32ID-ILP32-NEXT:    lui a1, %hi(.LCPI34_0)
-; RV32ID-ILP32-NEXT:    flw fa5, %lo(.LCPI34_0)(a1)
+; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a0
+; RV32ID-ILP32-NEXT:    fmv.w.x fa4, zero
+; RV32ID-ILP32-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-ILP32-NEXT:    lui a0, 292864
+; RV32ID-ILP32-NEXT:    addi a0, a0, -256
 ; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a0
-; RV32ID-ILP32-NEXT:    fmv.w.x fa3, zero
-; RV32ID-ILP32-NEXT:    fmax.s fa4, fa4, fa3
-; RV32ID-ILP32-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-ILP32-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-ILP32-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32ID-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32ID-ILP32-NEXT:    addi sp, sp, 16
@@ -7070,12 +7147,13 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
 ; RV64ID-LP64-NEXT:    addi sp, sp, -16
 ; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-LP64-NEXT:    call __extendhfsf2 at plt
-; RV64ID-LP64-NEXT:    lui a1, %hi(.LCPI34_0)
-; RV64ID-LP64-NEXT:    flw fa5, %lo(.LCPI34_0)(a1)
+; RV64ID-LP64-NEXT:    fmv.w.x fa5, a0
+; RV64ID-LP64-NEXT:    fmv.w.x fa4, zero
+; RV64ID-LP64-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-LP64-NEXT:    lui a0, 292864
+; RV64ID-LP64-NEXT:    addi a0, a0, -256
 ; RV64ID-LP64-NEXT:    fmv.w.x fa4, a0
-; RV64ID-LP64-NEXT:    fmv.w.x fa3, zero
-; RV64ID-LP64-NEXT:    fmax.s fa4, fa4, fa3
-; RV64ID-LP64-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-LP64-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-LP64-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64ID-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64ID-LP64-NEXT:    addi sp, sp, 16
@@ -7086,11 +7164,12 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
 ; RV32ID-NEXT:    addi sp, sp, -16
 ; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32ID-NEXT:    call __extendhfsf2 at plt
-; RV32ID-NEXT:    lui a0, %hi(.LCPI34_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; RV32ID-NEXT:    fmv.w.x fa4, zero
-; RV32ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV32ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-NEXT:    fmv.w.x fa5, zero
+; RV32ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV32ID-NEXT:    lui a0, 292864
+; RV32ID-NEXT:    addi a0, a0, -256
+; RV32ID-NEXT:    fmv.w.x fa4, a0
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32ID-NEXT:    addi sp, sp, 16
@@ -7101,11 +7180,12 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
 ; RV64ID-NEXT:    addi sp, sp, -16
 ; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-NEXT:    call __extendhfsf2 at plt
-; RV64ID-NEXT:    lui a0, %hi(.LCPI34_0)
-; RV64ID-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; RV64ID-NEXT:    fmv.w.x fa4, zero
-; RV64ID-NEXT:    fmax.s fa4, fa0, fa4
-; RV64ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-NEXT:    fmv.w.x fa5, zero
+; RV64ID-NEXT:    fmax.s fa5, fa0, fa5
+; RV64ID-NEXT:    lui a0, 292864
+; RV64ID-NEXT:    addi a0, a0, -256
+; RV64ID-NEXT:    fmv.w.x fa4, a0
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64ID-NEXT:    addi sp, sp, 16
@@ -7113,62 +7193,64 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
 ;
 ; CHECK32-IZFHMIN-LABEL: fcvt_wu_s_sat_i16:
 ; CHECK32-IZFHMIN:       # %bb.0: # %start
-; CHECK32-IZFHMIN-NEXT:    lui a0, %hi(.LCPI34_0)
-; CHECK32-IZFHMIN-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa4, fa0
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK32-IZFHMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    lui a0, 292864
+; CHECK32-IZFHMIN-NEXT:    addi a0, a0, -256
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32-IZFHMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK32-IZFHMIN-NEXT:    ret
 ;
 ; CHECK64-IZFHMIN-LABEL: fcvt_wu_s_sat_i16:
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
-; CHECK64-IZFHMIN-NEXT:    lui a0, %hi(.LCPI34_0)
-; CHECK64-IZFHMIN-NEXT:    flw fa5, %lo(.LCPI34_0)(a0)
-; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa4, fa0
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK64-IZFHMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    lui a0, 292864
+; CHECK64-IZFHMIN-NEXT:    addi a0, a0, -256
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64-IZFHMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; CHECK64-IZFHMIN-NEXT:    ret
 ;
 ; CHECK32-IZHINXMIN-LABEL: fcvt_wu_s_sat_i16:
 ; CHECK32-IZHINXMIN:       # %bb.0: # %start
-; CHECK32-IZHINXMIN-NEXT:    lui a1, %hi(.LCPI34_0)
-; CHECK32-IZHINXMIN-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK32-IZHINXMIN-NEXT:    lui a1, 292864
+; CHECK32-IZHINXMIN-NEXT:    addi a1, a1, -256
 ; CHECK32-IZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.wu.s a0, a0, rtz
 ; CHECK32-IZHINXMIN-NEXT:    ret
 ;
 ; CHECK64-IZHINXMIN-LABEL: fcvt_wu_s_sat_i16:
 ; CHECK64-IZHINXMIN:       # %bb.0: # %start
-; CHECK64-IZHINXMIN-NEXT:    lui a1, %hi(.LCPI34_0)
-; CHECK64-IZHINXMIN-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK64-IZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK64-IZHINXMIN-NEXT:    lui a1, 292864
+; CHECK64-IZHINXMIN-NEXT:    addiw a1, a1, -256
 ; CHECK64-IZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.lu.s a0, a0, rtz
 ; CHECK64-IZHINXMIN-NEXT:    ret
 ;
 ; CHECK32-IZDINXZHINXMIN-LABEL: fcvt_wu_s_sat_i16:
 ; CHECK32-IZDINXZHINXMIN:       # %bb.0: # %start
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, %hi(.LCPI34_0)
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, 292864
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a1, a1, -256
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.wu.s a0, a0, rtz
 ; CHECK32-IZDINXZHINXMIN-NEXT:    ret
 ;
 ; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_wu_s_sat_i16:
 ; CHECK64-IZDINXZHINXMIN:       # %bb.0: # %start
-; CHECK64-IZDINXZHINXMIN-NEXT:    lui a1, %hi(.LCPI34_0)
-; CHECK64-IZDINXZHINXMIN-NEXT:    lw a1, %lo(.LCPI34_0)(a1)
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fmax.s a0, a0, zero
+; CHECK64-IZDINXZHINXMIN-NEXT:    lui a1, 292864
+; CHECK64-IZDINXZHINXMIN-NEXT:    addiw a1, a1, -256
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fmin.s a0, a0, a1
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.lu.s a0, a0, rtz
 ; CHECK64-IZDINXZHINXMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll
index 9c11010540e15de..b4ce2c7047d5128 100644
--- a/llvm/test/CodeGen/RISCV/half-imm.ll
+++ b/llvm/test/CodeGen/RISCV/half-imm.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs \
-; RUN:   -target-abi ilp32f < %s | FileCheck %s
+; RUN:   -target-abi ilp32f < %s | FileCheck --check-prefixes=CHECK %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \
-; RUN:   -target-abi lp64f < %s | FileCheck %s
+; RUN:   -target-abi lp64f < %s | FileCheck --check-prefixes=CHECK %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zhinx -verify-machineinstrs \
 ; RUN:   -target-abi ilp32 < %s \
 ; RUN:   | FileCheck -check-prefix=RV32IZHINX %s
@@ -21,58 +21,55 @@
 ; RUN:   | FileCheck -check-prefixes=CHECKIZHINXMIN %s
 
 ; TODO: constant pool shouldn't be necessary for RV32IZfh and RV64IZfh
-define half @half_imm() nounwind {
+define half @half_imm() nounwind {;
 ; CHECK-LABEL: half_imm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa0, %lo(.LCPI0_0)(a0)
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    addi a0, a0, 512
+; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
 ;
 ; RV32IZHINX-LABEL: half_imm:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV32IZHINX-NEXT:    lh a0, %lo(.LCPI0_0)(a0)
+; RV32IZHINX-NEXT:    lui a0, 4
+; RV32IZHINX-NEXT:    addi a0, a0, 512
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: half_imm:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV64IZHINX-NEXT:    lh a0, %lo(.LCPI0_0)(a0)
+; RV64IZHINX-NEXT:    lui a0, 4
+; RV64IZHINX-NEXT:    addiw a0, a0, 512
 ; RV64IZHINX-NEXT:    ret
 ;
 ; CHECKIZFHMIN-LABEL: half_imm:
 ; CHECKIZFHMIN:       # %bb.0:
-; CHECKIZFHMIN-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECKIZFHMIN-NEXT:    flh fa0, %lo(.LCPI0_0)(a0)
+; CHECKIZFHMIN-NEXT:    lui a0, 4
+; CHECKIZFHMIN-NEXT:    addi a0, a0, 512
+; CHECKIZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; CHECKIZFHMIN-NEXT:    ret
-;
-; CHECKIZHINXMIN-LABEL: half_imm:
-; CHECKIZHINXMIN:       # %bb.0:
-; CHECKIZHINXMIN-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECKIZHINXMIN-NEXT:    lh a0, %lo(.LCPI0_0)(a0)
-; CHECKIZHINXMIN-NEXT:    ret
   ret half 3.0
 }
 
 define half @half_imm_op(half %a) nounwind {
 ; CHECK-LABEL: half_imm_op:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
+; CHECK-NEXT:    li a0, 15
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    fadd.h fa0, fa0, fa5
 ; CHECK-NEXT:    ret
 ;
 ; RV32IZHINX-LABEL: half_imm_op:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI1_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 15
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fadd.h a0, a0, a1
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: half_imm_op:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI1_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 15
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fadd.h a0, a0, a1
 ; RV64IZHINX-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index 2d4b7538c34d135..c56f000547b5299 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -2127,8 +2127,9 @@ declare half @llvm.floor.f16(half)
 define half @floor_f16(half %a) nounwind {
 ; CHECKIZFH-LABEL: floor_f16:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI17_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI17_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB17_2
@@ -2141,8 +2142,8 @@ define half @floor_f16(half %a) nounwind {
 ;
 ; CHECKIZHINX-LABEL: floor_f16:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI17_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB17_2
@@ -2218,8 +2219,9 @@ declare half @llvm.ceil.f16(half)
 define half @ceil_f16(half %a) nounwind {
 ; CHECKIZFH-LABEL: ceil_f16:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB18_2
@@ -2232,8 +2234,8 @@ define half @ceil_f16(half %a) nounwind {
 ;
 ; CHECKIZHINX-LABEL: ceil_f16:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI18_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB18_2
@@ -2309,8 +2311,9 @@ declare half @llvm.trunc.f16(half)
 define half @trunc_f16(half %a) nounwind {
 ; CHECKIZFH-LABEL: trunc_f16:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI19_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI19_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB19_2
@@ -2323,8 +2326,8 @@ define half @trunc_f16(half %a) nounwind {
 ;
 ; CHECKIZHINX-LABEL: trunc_f16:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI19_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB19_2
@@ -2400,8 +2403,9 @@ declare half @llvm.rint.f16(half)
 define half @rint_f16(half %a) nounwind {
 ; CHECKIZFH-LABEL: rint_f16:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB20_2
@@ -2414,8 +2418,8 @@ define half @rint_f16(half %a) nounwind {
 ;
 ; CHECKIZHINX-LABEL: rint_f16:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI20_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB20_2
@@ -2611,8 +2615,9 @@ declare half @llvm.round.f16(half)
 define half @round_f16(half %a) nounwind {
 ; CHECKIZFH-LABEL: round_f16:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB22_2
@@ -2625,8 +2630,8 @@ define half @round_f16(half %a) nounwind {
 ;
 ; CHECKIZHINX-LABEL: round_f16:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI22_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB22_2
@@ -2702,8 +2707,9 @@ declare half @llvm.roundeven.f16(half)
 define half @roundeven_f16(half %a) nounwind {
 ; CHECKIZFH-LABEL: roundeven_f16:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI23_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB23_2
@@ -2716,8 +2722,8 @@ define half @roundeven_f16(half %a) nounwind {
 ;
 ; CHECKIZHINX-LABEL: roundeven_f16:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI23_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB23_2
diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
index e7215f07c22045f..88258d7c420970a 100644
--- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
@@ -28,8 +28,8 @@ define signext i32 @test_floor_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_floor_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI0_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI0_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB0_2
@@ -95,8 +95,9 @@ define signext i32 @test_floor_si32(half %x) {
 define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_floor_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB1_2
@@ -121,8 +122,9 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:  # %bb.3:
 ; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB1_4:
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI1_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI1_1)(a1)
+; RV32IZFH-NEXT:    lui a1, 389120
+; RV32IZFH-NEXT:    addi a1, a1, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFH-NEXT:    beqz a3, .LBB1_6
 ; RV32IZFH-NEXT:  # %bb.5:
@@ -153,8 +155,8 @@ define i64 @test_floor_si64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_floor_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI1_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB1_2
@@ -174,9 +176,9 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI1_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI1_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s2, a0
+; RV32IZHINX-NEXT:    lui a2, 389120
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a4
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -203,8 +205,8 @@ define i64 @test_floor_si64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_floor_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI1_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB1_2
@@ -250,8 +252,9 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:  # %bb.3:
 ; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB1_4:
-; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI1_0)(a1)
+; RV32IZFHMIN-NEXT:    lui a1, 389120
+; RV32IZFHMIN-NEXT:    addi a1, a1, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFHMIN-NEXT:    beqz a3, .LBB1_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
@@ -317,9 +320,9 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s2, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 389120
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a4
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -381,8 +384,8 @@ define signext i32 @test_floor_ui32(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_floor_ui32:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI2_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI2_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB2_2
@@ -400,8 +403,8 @@ define signext i32 @test_floor_ui32(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_floor_ui32:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI2_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI2_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB2_2
@@ -510,8 +513,9 @@ define signext i32 @test_floor_ui32(half %x) {
 define i64 @test_floor_ui64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_floor_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB3_2
@@ -530,9 +534,10 @@ define i64 @test_floor_ui64(half %x) nounwind {
 ; RV32IZFH-NEXT:    neg s0, a0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixunssfdi at plt
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI3_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI3_1)(a2)
 ; RV32IZFH-NEXT:    and a0, s0, a0
+; RV32IZFH-NEXT:    lui a2, 391168
+; RV32IZFH-NEXT:    addi a2, a2, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFH-NEXT:    neg a2, a2
 ; RV32IZFH-NEXT:    or a0, a2, a0
@@ -555,8 +560,8 @@ define i64 @test_floor_ui64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_floor_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI3_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB3_2
@@ -574,9 +579,9 @@ define i64 @test_floor_ui64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s1, a0
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixunssfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI3_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI3_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s1, a0
+; RV32IZHINX-NEXT:    lui a2, 391168
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a2
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -590,8 +595,8 @@ define i64 @test_floor_ui64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_floor_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI3_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB3_2
@@ -631,9 +636,10 @@ define i64 @test_floor_ui64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    neg s0, a0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI3_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a2)
 ; RV32IZFHMIN-NEXT:    and a0, s0, a0
+; RV32IZFHMIN-NEXT:    lui a2, 391168
+; RV32IZFHMIN-NEXT:    addi a2, a2, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFHMIN-NEXT:    neg a2, a2
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
@@ -689,9 +695,9 @@ define i64 @test_floor_ui64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s1, a0
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI3_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI3_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s1, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 391168
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a2
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -740,8 +746,8 @@ define signext i32 @test_ceil_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_ceil_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI4_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI4_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB4_2
@@ -807,8 +813,9 @@ define signext i32 @test_ceil_si32(half %x) {
 define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_ceil_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI5_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB5_2
@@ -833,8 +840,9 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:  # %bb.3:
 ; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB5_4:
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI5_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI5_1)(a1)
+; RV32IZFH-NEXT:    lui a1, 389120
+; RV32IZFH-NEXT:    addi a1, a1, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFH-NEXT:    beqz a3, .LBB5_6
 ; RV32IZFH-NEXT:  # %bb.5:
@@ -865,8 +873,8 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_ceil_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI5_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI5_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB5_2
@@ -886,9 +894,9 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI5_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI5_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s2, a0
+; RV32IZHINX-NEXT:    lui a2, 389120
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a4
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -915,8 +923,8 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_ceil_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI5_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI5_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB5_2
@@ -962,8 +970,9 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:  # %bb.3:
 ; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB5_4:
-; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI5_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI5_0)(a1)
+; RV32IZFHMIN-NEXT:    lui a1, 389120
+; RV32IZFHMIN-NEXT:    addi a1, a1, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFHMIN-NEXT:    beqz a3, .LBB5_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
@@ -1029,9 +1038,9 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI5_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI5_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s2, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 389120
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a4
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -1093,8 +1102,8 @@ define signext i32 @test_ceil_ui32(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_ceil_ui32:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI6_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI6_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB6_2
@@ -1112,8 +1121,8 @@ define signext i32 @test_ceil_ui32(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_ceil_ui32:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI6_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI6_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB6_2
@@ -1222,8 +1231,9 @@ define signext i32 @test_ceil_ui32(half %x) {
 define i64 @test_ceil_ui64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_ceil_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI7_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB7_2
@@ -1242,9 +1252,10 @@ define i64 @test_ceil_ui64(half %x) nounwind {
 ; RV32IZFH-NEXT:    neg s0, a0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixunssfdi at plt
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI7_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI7_1)(a2)
 ; RV32IZFH-NEXT:    and a0, s0, a0
+; RV32IZFH-NEXT:    lui a2, 391168
+; RV32IZFH-NEXT:    addi a2, a2, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFH-NEXT:    neg a2, a2
 ; RV32IZFH-NEXT:    or a0, a2, a0
@@ -1267,8 +1278,8 @@ define i64 @test_ceil_ui64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_ceil_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI7_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI7_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB7_2
@@ -1286,9 +1297,9 @@ define i64 @test_ceil_ui64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s1, a0
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixunssfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI7_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI7_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s1, a0
+; RV32IZHINX-NEXT:    lui a2, 391168
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a2
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -1302,8 +1313,8 @@ define i64 @test_ceil_ui64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_ceil_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI7_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI7_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB7_2
@@ -1343,9 +1354,10 @@ define i64 @test_ceil_ui64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    neg s0, a0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI7_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI7_0)(a2)
 ; RV32IZFHMIN-NEXT:    and a0, s0, a0
+; RV32IZFHMIN-NEXT:    lui a2, 391168
+; RV32IZFHMIN-NEXT:    addi a2, a2, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFHMIN-NEXT:    neg a2, a2
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
@@ -1401,9 +1413,9 @@ define i64 @test_ceil_ui64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s1, a0
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI7_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI7_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s1, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 391168
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a2
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -1452,8 +1464,8 @@ define signext i32 @test_trunc_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_trunc_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI8_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI8_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB8_2
@@ -1519,8 +1531,9 @@ define signext i32 @test_trunc_si32(half %x) {
 define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_trunc_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI9_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB9_2
@@ -1545,8 +1558,9 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:  # %bb.3:
 ; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB9_4:
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI9_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI9_1)(a1)
+; RV32IZFH-NEXT:    lui a1, 389120
+; RV32IZFH-NEXT:    addi a1, a1, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFH-NEXT:    beqz a3, .LBB9_6
 ; RV32IZFH-NEXT:  # %bb.5:
@@ -1577,8 +1591,8 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_trunc_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI9_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI9_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB9_2
@@ -1598,9 +1612,9 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI9_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI9_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s2, a0
+; RV32IZHINX-NEXT:    lui a2, 389120
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a4
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -1627,8 +1641,8 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_trunc_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI9_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI9_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB9_2
@@ -1674,8 +1688,9 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:  # %bb.3:
 ; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB9_4:
-; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI9_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI9_0)(a1)
+; RV32IZFHMIN-NEXT:    lui a1, 389120
+; RV32IZFHMIN-NEXT:    addi a1, a1, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFHMIN-NEXT:    beqz a3, .LBB9_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
@@ -1741,9 +1756,9 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI9_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI9_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s2, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 389120
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a4
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -1805,8 +1820,8 @@ define signext i32 @test_trunc_ui32(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_trunc_ui32:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI10_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB10_2
@@ -1824,8 +1839,8 @@ define signext i32 @test_trunc_ui32(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_trunc_ui32:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI10_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB10_2
@@ -1934,8 +1949,9 @@ define signext i32 @test_trunc_ui32(half %x) {
 define i64 @test_trunc_ui64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_trunc_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI11_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB11_2
@@ -1954,9 +1970,10 @@ define i64 @test_trunc_ui64(half %x) nounwind {
 ; RV32IZFH-NEXT:    neg s0, a0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixunssfdi at plt
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI11_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI11_1)(a2)
 ; RV32IZFH-NEXT:    and a0, s0, a0
+; RV32IZFH-NEXT:    lui a2, 391168
+; RV32IZFH-NEXT:    addi a2, a2, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFH-NEXT:    neg a2, a2
 ; RV32IZFH-NEXT:    or a0, a2, a0
@@ -1979,8 +1996,8 @@ define i64 @test_trunc_ui64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_trunc_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI11_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI11_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB11_2
@@ -1998,9 +2015,9 @@ define i64 @test_trunc_ui64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s1, a0
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixunssfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI11_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI11_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s1, a0
+; RV32IZHINX-NEXT:    lui a2, 391168
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a2
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -2014,8 +2031,8 @@ define i64 @test_trunc_ui64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_trunc_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI11_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI11_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB11_2
@@ -2055,9 +2072,10 @@ define i64 @test_trunc_ui64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    neg s0, a0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI11_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI11_0)(a2)
 ; RV32IZFHMIN-NEXT:    and a0, s0, a0
+; RV32IZFHMIN-NEXT:    lui a2, 391168
+; RV32IZFHMIN-NEXT:    addi a2, a2, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFHMIN-NEXT:    neg a2, a2
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
@@ -2113,9 +2131,9 @@ define i64 @test_trunc_ui64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s1, a0
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI11_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI11_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s1, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 391168
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a2
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -2164,8 +2182,8 @@ define signext i32 @test_round_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_round_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI12_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI12_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB12_2
@@ -2231,8 +2249,9 @@ define signext i32 @test_round_si32(half %x) {
 define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_round_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI13_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB13_2
@@ -2257,8 +2276,9 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:  # %bb.3:
 ; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB13_4:
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI13_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI13_1)(a1)
+; RV32IZFH-NEXT:    lui a1, 389120
+; RV32IZFH-NEXT:    addi a1, a1, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFH-NEXT:    beqz a3, .LBB13_6
 ; RV32IZFH-NEXT:  # %bb.5:
@@ -2289,8 +2309,8 @@ define i64 @test_round_si64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_round_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI13_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI13_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB13_2
@@ -2310,9 +2330,9 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI13_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI13_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s2, a0
+; RV32IZHINX-NEXT:    lui a2, 389120
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a4
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -2339,8 +2359,8 @@ define i64 @test_round_si64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_round_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI13_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI13_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB13_2
@@ -2386,8 +2406,9 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:  # %bb.3:
 ; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB13_4:
-; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI13_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI13_0)(a1)
+; RV32IZFHMIN-NEXT:    lui a1, 389120
+; RV32IZFHMIN-NEXT:    addi a1, a1, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFHMIN-NEXT:    beqz a3, .LBB13_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
@@ -2453,9 +2474,9 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI13_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI13_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s2, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 389120
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a4
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -2517,8 +2538,8 @@ define signext i32 @test_round_ui32(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_round_ui32:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI14_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI14_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB14_2
@@ -2536,8 +2557,8 @@ define signext i32 @test_round_ui32(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_round_ui32:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI14_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI14_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB14_2
@@ -2646,8 +2667,9 @@ define signext i32 @test_round_ui32(half %x) {
 define i64 @test_round_ui64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_round_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI15_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB15_2
@@ -2666,9 +2688,10 @@ define i64 @test_round_ui64(half %x) nounwind {
 ; RV32IZFH-NEXT:    neg s0, a0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixunssfdi at plt
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI15_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI15_1)(a2)
 ; RV32IZFH-NEXT:    and a0, s0, a0
+; RV32IZFH-NEXT:    lui a2, 391168
+; RV32IZFH-NEXT:    addi a2, a2, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFH-NEXT:    neg a2, a2
 ; RV32IZFH-NEXT:    or a0, a2, a0
@@ -2691,8 +2714,8 @@ define i64 @test_round_ui64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_round_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI15_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI15_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB15_2
@@ -2710,9 +2733,9 @@ define i64 @test_round_ui64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s1, a0
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixunssfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI15_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI15_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s1, a0
+; RV32IZHINX-NEXT:    lui a2, 391168
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a2
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -2726,8 +2749,8 @@ define i64 @test_round_ui64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_round_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI15_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI15_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB15_2
@@ -2767,9 +2790,10 @@ define i64 @test_round_ui64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    neg s0, a0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI15_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI15_0)(a2)
 ; RV32IZFHMIN-NEXT:    and a0, s0, a0
+; RV32IZFHMIN-NEXT:    lui a2, 391168
+; RV32IZFHMIN-NEXT:    addi a2, a2, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFHMIN-NEXT:    neg a2, a2
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
@@ -2825,9 +2849,9 @@ define i64 @test_round_ui64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s1, a0
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI15_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI15_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s1, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 391168
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a2
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -2876,8 +2900,8 @@ define signext i32 @test_roundeven_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_roundeven_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI16_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB16_2
@@ -2943,8 +2967,9 @@ define signext i32 @test_roundeven_si32(half %x) {
 define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_roundeven_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI17_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI17_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB17_2
@@ -2969,8 +2994,9 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:  # %bb.3:
 ; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB17_4:
-; RV32IZFH-NEXT:    lui a1, %hi(.LCPI17_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI17_1)(a1)
+; RV32IZFH-NEXT:    lui a1, 389120
+; RV32IZFH-NEXT:    addi a1, a1, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFH-NEXT:    beqz a3, .LBB17_6
 ; RV32IZFH-NEXT:  # %bb.5:
@@ -3001,8 +3027,8 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI17_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB17_2
@@ -3022,9 +3048,9 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI17_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI17_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s2, a0
+; RV32IZHINX-NEXT:    lui a2, 389120
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a4
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -3051,8 +3077,8 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI17_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB17_2
@@ -3098,8 +3124,9 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:  # %bb.3:
 ; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB17_4:
-; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI17_0)(a1)
+; RV32IZFHMIN-NEXT:    lui a1, 389120
+; RV32IZFHMIN-NEXT:    addi a1, a1, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
 ; RV32IZFHMIN-NEXT:    beqz a3, .LBB17_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
@@ -3165,9 +3192,9 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI17_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI17_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s2, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 389120
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a4
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
@@ -3229,8 +3256,8 @@ define signext i32 @test_roundeven_ui32(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_ui32:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI18_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI18_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB18_2
@@ -3248,8 +3275,8 @@ define signext i32 @test_roundeven_ui32(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_ui32:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI18_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI18_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB18_2
@@ -3358,8 +3385,9 @@ define signext i32 @test_roundeven_ui32(half %x) {
 define i64 @test_roundeven_ui64(half %x) nounwind {
 ; RV32IZFH-LABEL: test_roundeven_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI19_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB19_2
@@ -3378,9 +3406,10 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
 ; RV32IZFH-NEXT:    neg s0, a0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixunssfdi at plt
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI19_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI19_1)(a2)
 ; RV32IZFH-NEXT:    and a0, s0, a0
+; RV32IZFH-NEXT:    lui a2, 391168
+; RV32IZFH-NEXT:    addi a2, a2, -1
+; RV32IZFH-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFH-NEXT:    neg a2, a2
 ; RV32IZFH-NEXT:    or a0, a2, a0
@@ -3403,8 +3432,8 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI19_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI19_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB19_2
@@ -3422,9 +3451,9 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    neg s1, a0
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixunssfdi at plt
-; RV32IZHINX-NEXT:    lui a2, %hi(.LCPI19_1)
-; RV32IZHINX-NEXT:    lw a2, %lo(.LCPI19_1)(a2)
 ; RV32IZHINX-NEXT:    and a0, s1, a0
+; RV32IZHINX-NEXT:    lui a2, 391168
+; RV32IZHINX-NEXT:    addi a2, a2, -1
 ; RV32IZHINX-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINX-NEXT:    neg a2, a2
 ; RV32IZHINX-NEXT:    or a0, a2, a0
@@ -3438,8 +3467,8 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI19_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI19_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB19_2
@@ -3479,9 +3508,10 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    neg s0, a0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI19_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI19_0)(a2)
 ; RV32IZFHMIN-NEXT:    and a0, s0, a0
+; RV32IZFHMIN-NEXT:    lui a2, 391168
+; RV32IZFHMIN-NEXT:    addi a2, a2, -1
+; RV32IZFHMIN-NEXT:    fmv.w.x fa5, a2
 ; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
 ; RV32IZFHMIN-NEXT:    neg a2, a2
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
@@ -3537,9 +3567,9 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    neg s1, a0
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixunssfdi at plt
-; RV32IZHINXMIN-NEXT:    lui a2, %hi(.LCPI19_0)
-; RV32IZHINXMIN-NEXT:    lw a2, %lo(.LCPI19_0)(a2)
 ; RV32IZHINXMIN-NEXT:    and a0, s1, a0
+; RV32IZHINXMIN-NEXT:    lui a2, 391168
+; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
 ; RV32IZHINXMIN-NEXT:    flt.s a2, a2, s0
 ; RV32IZHINXMIN-NEXT:    neg a2, a2
 ; RV32IZHINXMIN-NEXT:    or a0, a2, a0
diff --git a/llvm/test/CodeGen/RISCV/half-round-conv.ll b/llvm/test/CodeGen/RISCV/half-round-conv.ll
index 84ba49684fc6434..cb4758917ae0428 100644
--- a/llvm/test/CodeGen/RISCV/half-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/half-round-conv.ll
@@ -29,8 +29,8 @@ define signext i8 @test_floor_si8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_floor_si8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI0_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI0_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB0_2
@@ -44,8 +44,8 @@ define signext i8 @test_floor_si8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_floor_si8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI0_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI0_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB0_2
@@ -144,8 +144,8 @@ define signext i16 @test_floor_si16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_floor_si16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI1_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB1_2
@@ -159,8 +159,8 @@ define signext i16 @test_floor_si16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_floor_si16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI1_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB1_2
@@ -254,8 +254,8 @@ define signext i32 @test_floor_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_floor_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI2_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI2_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB2_2
@@ -309,8 +309,9 @@ define signext i32 @test_floor_si32(half %x) {
 define i64 @test_floor_si64(half %x) {
 ; RV32IZFH-LABEL: test_floor_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB3_2
@@ -335,8 +336,8 @@ define i64 @test_floor_si64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_floor_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI3_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB3_2
@@ -356,8 +357,8 @@ define i64 @test_floor_si64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_floor_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI3_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI3_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB3_2
@@ -466,8 +467,8 @@ define zeroext i8 @test_floor_ui8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_floor_ui8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI4_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB4_2
@@ -481,8 +482,8 @@ define zeroext i8 @test_floor_ui8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_floor_ui8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI4_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB4_2
@@ -581,8 +582,8 @@ define zeroext i16 @test_floor_ui16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_floor_ui16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI5_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI5_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB5_2
@@ -596,8 +597,8 @@ define zeroext i16 @test_floor_ui16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_floor_ui16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI5_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI5_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB5_2
@@ -691,8 +692,8 @@ define signext i32 @test_floor_ui32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_floor_ui32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI6_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI6_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB6_2
@@ -746,8 +747,9 @@ define signext i32 @test_floor_ui32(half %x) {
 define i64 @test_floor_ui64(half %x) {
 ; RV32IZFH-LABEL: test_floor_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI7_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB7_2
@@ -772,8 +774,8 @@ define i64 @test_floor_ui64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_floor_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI7_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI7_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB7_2
@@ -793,8 +795,8 @@ define i64 @test_floor_ui64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_floor_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI7_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI7_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB7_2
@@ -903,8 +905,8 @@ define signext i8 @test_ceil_si8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_ceil_si8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI8_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI8_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB8_2
@@ -918,8 +920,8 @@ define signext i8 @test_ceil_si8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_ceil_si8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI8_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI8_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB8_2
@@ -1018,8 +1020,8 @@ define signext i16 @test_ceil_si16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_ceil_si16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI9_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI9_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB9_2
@@ -1033,8 +1035,8 @@ define signext i16 @test_ceil_si16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_ceil_si16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI9_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI9_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB9_2
@@ -1128,8 +1130,8 @@ define signext i32 @test_ceil_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_ceil_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI10_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI10_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB10_2
@@ -1183,8 +1185,9 @@ define signext i32 @test_ceil_si32(half %x) {
 define i64 @test_ceil_si64(half %x) {
 ; RV32IZFH-LABEL: test_ceil_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI11_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB11_2
@@ -1209,8 +1212,8 @@ define i64 @test_ceil_si64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_ceil_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI11_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI11_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB11_2
@@ -1230,8 +1233,8 @@ define i64 @test_ceil_si64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_ceil_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI11_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI11_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB11_2
@@ -1340,8 +1343,8 @@ define zeroext i8 @test_ceil_ui8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_ceil_ui8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI12_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB12_2
@@ -1355,8 +1358,8 @@ define zeroext i8 @test_ceil_ui8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_ceil_ui8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI12_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB12_2
@@ -1455,8 +1458,8 @@ define zeroext i16 @test_ceil_ui16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_ceil_ui16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI13_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI13_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB13_2
@@ -1470,8 +1473,8 @@ define zeroext i16 @test_ceil_ui16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_ceil_ui16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI13_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI13_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB13_2
@@ -1565,8 +1568,8 @@ define signext i32 @test_ceil_ui32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_ceil_ui32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI14_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI14_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB14_2
@@ -1620,8 +1623,9 @@ define signext i32 @test_ceil_ui32(half %x) {
 define i64 @test_ceil_ui64(half %x) {
 ; RV32IZFH-LABEL: test_ceil_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI15_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB15_2
@@ -1646,8 +1650,8 @@ define i64 @test_ceil_ui64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_ceil_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI15_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI15_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB15_2
@@ -1667,8 +1671,8 @@ define i64 @test_ceil_ui64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_ceil_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI15_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI15_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB15_2
@@ -1777,8 +1781,8 @@ define signext i8 @test_trunc_si8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_trunc_si8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI16_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI16_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB16_2
@@ -1792,8 +1796,8 @@ define signext i8 @test_trunc_si8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_trunc_si8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI16_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI16_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB16_2
@@ -1892,8 +1896,8 @@ define signext i16 @test_trunc_si16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_trunc_si16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI17_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB17_2
@@ -1907,8 +1911,8 @@ define signext i16 @test_trunc_si16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_trunc_si16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI17_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB17_2
@@ -2002,8 +2006,8 @@ define signext i32 @test_trunc_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_trunc_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI18_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB18_2
@@ -2057,8 +2061,9 @@ define signext i32 @test_trunc_si32(half %x) {
 define i64 @test_trunc_si64(half %x) {
 ; RV32IZFH-LABEL: test_trunc_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI19_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB19_2
@@ -2083,8 +2088,8 @@ define i64 @test_trunc_si64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_trunc_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI19_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI19_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB19_2
@@ -2104,8 +2109,8 @@ define i64 @test_trunc_si64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_trunc_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI19_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI19_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB19_2
@@ -2214,8 +2219,8 @@ define zeroext i8 @test_trunc_ui8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_trunc_ui8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI20_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI20_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB20_2
@@ -2229,8 +2234,8 @@ define zeroext i8 @test_trunc_ui8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_trunc_ui8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI20_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI20_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB20_2
@@ -2329,8 +2334,8 @@ define zeroext i16 @test_trunc_ui16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_trunc_ui16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI21_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI21_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB21_2
@@ -2344,8 +2349,8 @@ define zeroext i16 @test_trunc_ui16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_trunc_ui16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI21_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI21_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB21_2
@@ -2439,8 +2444,8 @@ define signext i32 @test_trunc_ui32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_trunc_ui32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI22_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB22_2
@@ -2494,8 +2499,9 @@ define signext i32 @test_trunc_ui32(half %x) {
 define i64 @test_trunc_ui64(half %x) {
 ; RV32IZFH-LABEL: test_trunc_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB23_2
@@ -2520,8 +2526,8 @@ define i64 @test_trunc_ui64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_trunc_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI23_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI23_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB23_2
@@ -2541,8 +2547,8 @@ define i64 @test_trunc_ui64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_trunc_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI23_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI23_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB23_2
@@ -2651,8 +2657,8 @@ define signext i8 @test_round_si8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_round_si8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI24_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI24_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB24_2
@@ -2666,8 +2672,8 @@ define signext i8 @test_round_si8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_round_si8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI24_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI24_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB24_2
@@ -2766,8 +2772,8 @@ define signext i16 @test_round_si16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_round_si16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI25_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI25_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB25_2
@@ -2781,8 +2787,8 @@ define signext i16 @test_round_si16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_round_si16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI25_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI25_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB25_2
@@ -2876,8 +2882,8 @@ define signext i32 @test_round_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_round_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI26_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB26_2
@@ -2931,8 +2937,9 @@ define signext i32 @test_round_si32(half %x) {
 define i64 @test_round_si64(half %x) {
 ; RV32IZFH-LABEL: test_round_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI27_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI27_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB27_2
@@ -2957,8 +2964,8 @@ define i64 @test_round_si64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_round_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI27_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI27_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB27_2
@@ -2978,8 +2985,8 @@ define i64 @test_round_si64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_round_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI27_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI27_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB27_2
@@ -3088,8 +3095,8 @@ define zeroext i8 @test_round_ui8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_round_ui8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI28_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI28_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB28_2
@@ -3103,8 +3110,8 @@ define zeroext i8 @test_round_ui8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_round_ui8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI28_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI28_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB28_2
@@ -3203,8 +3210,8 @@ define zeroext i16 @test_round_ui16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_round_ui16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI29_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI29_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB29_2
@@ -3218,8 +3225,8 @@ define zeroext i16 @test_round_ui16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_round_ui16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI29_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI29_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB29_2
@@ -3313,8 +3320,8 @@ define signext i32 @test_round_ui32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_round_ui32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI30_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB30_2
@@ -3368,8 +3375,9 @@ define signext i32 @test_round_ui32(half %x) {
 define i64 @test_round_ui64(half %x) {
 ; RV32IZFH-LABEL: test_round_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI31_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI31_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB31_2
@@ -3394,8 +3402,8 @@ define i64 @test_round_ui64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_round_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI31_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI31_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB31_2
@@ -3415,8 +3423,8 @@ define i64 @test_round_ui64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_round_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI31_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI31_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB31_2
@@ -3525,8 +3533,8 @@ define signext i8 @test_roundeven_si8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_si8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI32_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB32_2
@@ -3540,8 +3548,8 @@ define signext i8 @test_roundeven_si8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_si8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI32_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI32_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB32_2
@@ -3640,8 +3648,8 @@ define signext i16 @test_roundeven_si16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_si16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI33_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI33_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB33_2
@@ -3655,8 +3663,8 @@ define signext i16 @test_roundeven_si16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_si16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI33_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI33_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB33_2
@@ -3750,8 +3758,8 @@ define signext i32 @test_roundeven_si32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_roundeven_si32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI34_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI34_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB34_2
@@ -3805,8 +3813,9 @@ define signext i32 @test_roundeven_si32(half %x) {
 define i64 @test_roundeven_si64(half %x) {
 ; RV32IZFH-LABEL: test_roundeven_si64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI35_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI35_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB35_2
@@ -3831,8 +3840,8 @@ define i64 @test_roundeven_si64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_si64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI35_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI35_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB35_2
@@ -3852,8 +3861,8 @@ define i64 @test_roundeven_si64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_si64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI35_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI35_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB35_2
@@ -3962,8 +3971,8 @@ define zeroext i8 @test_roundeven_ui8(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_ui8:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI36_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI36_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB36_2
@@ -3977,8 +3986,8 @@ define zeroext i8 @test_roundeven_ui8(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_ui8:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI36_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI36_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB36_2
@@ -4077,8 +4086,8 @@ define zeroext i16 @test_roundeven_ui16(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_ui16:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI37_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI37_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB37_2
@@ -4092,8 +4101,8 @@ define zeroext i16 @test_roundeven_ui16(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_ui16:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI37_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI37_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB37_2
@@ -4187,8 +4196,8 @@ define signext i32 @test_roundeven_ui32(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_roundeven_ui32:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI38_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI38_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB38_2
@@ -4242,8 +4251,9 @@ define signext i32 @test_roundeven_ui32(half %x) {
 define i64 @test_roundeven_ui64(half %x) {
 ; RV32IZFH-LABEL: test_roundeven_ui64:
 ; RV32IZFH:       # %bb.0:
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI39_0)
-; RV32IZFH-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
+; RV32IZFH-NEXT:    li a0, 25
+; RV32IZFH-NEXT:    slli a0, a0, 10
+; RV32IZFH-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFH-NEXT:    fabs.h fa4, fa0
 ; RV32IZFH-NEXT:    flt.h a0, fa4, fa5
 ; RV32IZFH-NEXT:    beqz a0, .LBB39_2
@@ -4268,8 +4278,8 @@ define i64 @test_roundeven_ui64(half %x) {
 ;
 ; RV32IZHINX-LABEL: test_roundeven_ui64:
 ; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a1, %hi(.LCPI39_0)
-; RV32IZHINX-NEXT:    lh a1, %lo(.LCPI39_0)(a1)
+; RV32IZHINX-NEXT:    li a1, 25
+; RV32IZHINX-NEXT:    slli a1, a1, 10
 ; RV32IZHINX-NEXT:    fabs.h a2, a0
 ; RV32IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV32IZHINX-NEXT:    beqz a1, .LBB39_2
@@ -4289,8 +4299,8 @@ define i64 @test_roundeven_ui64(half %x) {
 ;
 ; RV64IZHINX-LABEL: test_roundeven_ui64:
 ; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a1, %hi(.LCPI39_0)
-; RV64IZHINX-NEXT:    lh a1, %lo(.LCPI39_0)(a1)
+; RV64IZHINX-NEXT:    li a1, 25
+; RV64IZHINX-NEXT:    slli a1, a1, 10
 ; RV64IZHINX-NEXT:    fabs.h a2, a0
 ; RV64IZHINX-NEXT:    flt.h a1, a2, a1
 ; RV64IZHINX-NEXT:    beqz a1, .LBB39_2
@@ -4410,8 +4420,9 @@ define half @test_floor_half(half %x) {
 ; RV64IFD-NEXT:    ret
 ; CHECKIZFH-LABEL: test_floor_half:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI40_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB40_2
@@ -4424,8 +4435,8 @@ define half @test_floor_half(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_floor_half:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI40_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI40_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB40_2
@@ -4494,8 +4505,9 @@ define half @test_ceil_half(half %x) {
 ; RV64IFD-NEXT:    ret
 ; CHECKIZFH-LABEL: test_ceil_half:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI41_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI41_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB41_2
@@ -4508,8 +4520,8 @@ define half @test_ceil_half(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_ceil_half:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI41_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI41_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB41_2
@@ -4578,8 +4590,9 @@ define half @test_trunc_half(half %x) {
 ; RV64IFD-NEXT:    ret
 ; CHECKIZFH-LABEL: test_trunc_half:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI42_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB42_2
@@ -4592,8 +4605,8 @@ define half @test_trunc_half(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_trunc_half:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI42_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI42_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB42_2
@@ -4662,8 +4675,9 @@ define half @test_round_half(half %x) {
 ; RV64IFD-NEXT:    ret
 ; CHECKIZFH-LABEL: test_round_half:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI43_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI43_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB43_2
@@ -4676,8 +4690,8 @@ define half @test_round_half(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_round_half:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI43_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI43_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB43_2
@@ -4746,8 +4760,9 @@ define half @test_roundeven_half(half %x) {
 ; RV64IFD-NEXT:    ret
 ; CHECKIZFH-LABEL: test_roundeven_half:
 ; CHECKIZFH:       # %bb.0:
-; CHECKIZFH-NEXT:    lui a0, %hi(.LCPI44_0)
-; CHECKIZFH-NEXT:    flh fa5, %lo(.LCPI44_0)(a0)
+; CHECKIZFH-NEXT:    li a0, 25
+; CHECKIZFH-NEXT:    slli a0, a0, 10
+; CHECKIZFH-NEXT:    fmv.h.x fa5, a0
 ; CHECKIZFH-NEXT:    fabs.h fa4, fa0
 ; CHECKIZFH-NEXT:    flt.h a0, fa4, fa5
 ; CHECKIZFH-NEXT:    beqz a0, .LBB44_2
@@ -4760,8 +4775,8 @@ define half @test_roundeven_half(half %x) {
 ;
 ; CHECKIZHINX-LABEL: test_roundeven_half:
 ; CHECKIZHINX:       # %bb.0:
-; CHECKIZHINX-NEXT:    lui a1, %hi(.LCPI44_0)
-; CHECKIZHINX-NEXT:    lh a1, %lo(.LCPI44_0)(a1)
+; CHECKIZHINX-NEXT:    li a1, 25
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
 ; CHECKIZHINX-NEXT:    fabs.h a2, a0
 ; CHECKIZHINX-NEXT:    flt.h a1, a2, a1
 ; CHECKIZHINX-NEXT:    beqz a1, .LBB44_2
diff --git a/llvm/test/CodeGen/RISCV/half-zfa-fli.ll b/llvm/test/CodeGen/RISCV/half-zfa-fli.ll
index 2805a8e582b34b9..6880c0281992efd 100644
--- a/llvm/test/CodeGen/RISCV/half-zfa-fli.ll
+++ b/llvm/test/CodeGen/RISCV/half-zfa-fli.ll
@@ -16,8 +16,9 @@ define half @loadfpimm1() {
 ;
 ; ZFHMIN-LABEL: loadfpimm1:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI0_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI0_0)(a0)
+; ZFHMIN-NEXT:    li a0, 11
+; ZFHMIN-NEXT:    slli a0, a0, 10
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 0.0625
 }
@@ -30,8 +31,9 @@ define half @loadfpimm2() {
 ;
 ; ZFHMIN-LABEL: loadfpimm2:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI1_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI1_0)(a0)
+; ZFHMIN-NEXT:    li a0, 29
+; ZFHMIN-NEXT:    slli a0, a0, 9
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 0.75
 }
@@ -44,8 +46,9 @@ define half @loadfpimm3() {
 ;
 ; ZFHMIN-LABEL: loadfpimm3:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI2_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI2_0)(a0)
+; ZFHMIN-NEXT:    lui a0, 4
+; ZFHMIN-NEXT:    addi a0, a0, -768
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 1.25
 }
@@ -58,8 +61,9 @@ define half @loadfpimm4() {
 ;
 ; ZFHMIN-LABEL: loadfpimm4:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI3_0)(a0)
+; ZFHMIN-NEXT:    lui a0, 4
+; ZFHMIN-NEXT:    addi a0, a0, 512
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 3.0
 }
@@ -72,8 +76,9 @@ define half @loadfpimm5() {
 ;
 ; ZFHMIN-LABEL: loadfpimm5:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI4_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI4_0)(a0)
+; ZFHMIN-NEXT:    li a0, 23
+; ZFHMIN-NEXT:    slli a0, a0, 10
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 256.0
 }
@@ -86,8 +91,9 @@ define half @loadfpimm6() {
 ;
 ; ZFHMIN-LABEL: loadfpimm6:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI5_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI5_0)(a0)
+; ZFHMIN-NEXT:    li a0, 31
+; ZFHMIN-NEXT:    slli a0, a0, 10
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 0xH7C00
 }
@@ -100,8 +106,9 @@ define half @loadfpimm7() {
 ;
 ; ZFHMIN-LABEL: loadfpimm7:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI6_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI6_0)(a0)
+; ZFHMIN-NEXT:    lui a0, 8
+; ZFHMIN-NEXT:    addi a0, a0, -512
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 0xH7E00
 }
@@ -123,14 +130,16 @@ define half @loadfpimm8() {
 define half @loadfpimm9() {
 ; CHECK-LABEL: loadfpimm9:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI8_0)
-; CHECK-NEXT:    flh fa0, %lo(.LCPI8_0)(a0)
+; CHECK-NEXT:    lui a0, 6
+; CHECK-NEXT:    addi a0, a0, -1032
+; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
 ;
 ; ZFHMIN-LABEL: loadfpimm9:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI8_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI8_0)(a0)
+; ZFHMIN-NEXT:    lui a0, 6
+; ZFHMIN-NEXT:    addi a0, a0, -1032
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 255.0
 }
@@ -169,14 +178,16 @@ define half @loadfpimm11() {
 define half @loadfpimm12() {
 ; CHECK-LABEL: loadfpimm12:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    flh fa0, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1023
+; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
 ;
 ; ZFHMIN-LABEL: loadfpimm12:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI11_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI11_0)(a0)
+; ZFHMIN-NEXT:    lui a0, 8
+; ZFHMIN-NEXT:    addi a0, a0, -1023
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half 0xH7c01
 }
@@ -189,8 +200,9 @@ define half @loadfpimm13() {
 ;
 ; ZFHMIN-LABEL: loadfpimm13:
 ; ZFHMIN:       # %bb.0:
-; ZFHMIN-NEXT:    lui a0, %hi(.LCPI12_0)
-; ZFHMIN-NEXT:    flh fa0, %lo(.LCPI12_0)(a0)
+; ZFHMIN-NEXT:    li a0, -17
+; ZFHMIN-NEXT:    slli a0, a0, 10
+; ZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZFHMIN-NEXT:    ret
   ret half -1.0
 }
diff --git a/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll b/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll
index f183c936fc6721c..f3b4319ccc4fa82 100644
--- a/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll
+++ b/llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll
@@ -17,8 +17,9 @@ entry:
 define void @two_fdivs(double %a0, double %a1, double %a2, ptr %res) {
 ; CHECK-LABEL: two_fdivs:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI1_0)(a1)
+; CHECK-NEXT:    li a1, 1023
+; CHECK-NEXT:    slli a1, a1, 52
+; CHECK-NEXT:    fmv.d.x fa5, a1
 ; CHECK-NEXT:    fdiv.d fa5, fa5, fa0
 ; CHECK-NEXT:    fmul.d fa4, fa1, fa5
 ; CHECK-NEXT:    fmul.d fa5, fa2, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index 7ce167f8929736b..03326447c3e0764 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s
 
 declare <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
@@ -30,10 +31,11 @@ define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
 define <vscale x 1 x half> @vp_ceil_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv1f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -53,10 +55,11 @@ declare <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half>, <vscale x
 define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
@@ -74,10 +77,11 @@ define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
 define <vscale x 2 x half> @vp_ceil_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv2f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +101,11 @@ declare <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half>, <vscale x
 define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
@@ -118,10 +123,11 @@ define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 define <vscale x 4 x half> @vp_ceil_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv4f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -142,10 +148,11 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: vp_ceil_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
@@ -164,10 +171,11 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 define <vscale x 8 x half> @vp_ceil_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv8f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI7_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -188,10 +196,11 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: vp_ceil_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI8_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
@@ -210,10 +219,11 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 define <vscale x 16 x half> @vp_ceil_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv16f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI9_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI9_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -234,10 +244,11 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: vp_ceil_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI10_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
@@ -256,10 +267,11 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv32f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI11_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI11_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -503,41 +515,75 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32_unmasked(<vscale x 16 x float>
 declare <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @vp_ceil_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv1f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv1f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -547,43 +593,79 @@ define <vscale x 1 x double> @vp_ceil_vv_nxv1f64_unmasked(<vscale x 1 x double>
 declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @vp_ceil_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv2f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv2f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -593,43 +675,79 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64_unmasked(<vscale x 2 x double>
 declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI26_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @vp_ceil_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv4f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI27_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv4f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -639,43 +757,79 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64_unmasked(<vscale x 4 x double>
 declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
 
 define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv7f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv7f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI28_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv7f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
 }
 
 define <vscale x 7 x double> @vp_ceil_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv7f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI29_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv7f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -685,43 +839,79 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64_unmasked(<vscale x 7 x double>
 declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI30_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @vp_ceil_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv8f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI31_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv8f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -732,111 +922,215 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64_unmasked(<vscale x 8 x double>
 declare <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 3
-; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a2, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV32-NEXT:    vmv1r.v v24, v0
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    srli a2, a1, 3
+; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vx v25, v0, a2
+; RV32-NEXT:    sub a2, a0, a1
+; RV32-NEXT:    sltu a3, a0, a2
+; RV32-NEXT:    addi a3, a3, -1
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    lui a3, %hi(.LCPI32_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v25
+; RV32-NEXT:    vfabs.v v8, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; RV32-NEXT:    fsrmi a2, 3
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v25
+; RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; RV32-NEXT:    fsrm a2
+; RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    bltu a0, a1, .LBB32_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:  .LBB32_2:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    srli a2, a1, 3
+; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vx v25, v0, a2
+; RV64-NEXT:    sub a2, a0, a1
+; RV64-NEXT:    sltu a3, a0, a2
+; RV64-NEXT:    addi a3, a3, -1
+; RV64-NEXT:    and a2, a3, a2
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v25
+; RV64-NEXT:    vfabs.v v8, v16, v0.t
+; RV64-NEXT:    li a2, 1075
+; RV64-NEXT:    slli a2, a2, 52
+; RV64-NEXT:    fmv.d.x fa5, a2
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; RV64-NEXT:    fsrmi a2, 3
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v25
+; RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; RV64-NEXT:    fsrm a2
+; RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    bltu a0, a1, .LBB32_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB32_2:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
 
 define <vscale x 16 x double> @vp_ceil_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a2, 3
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_ceil_vv_nxv16f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    sub a2, a0, a1
+; RV32-NEXT:    lui a3, %hi(.LCPI33_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
+; RV32-NEXT:    sltu a3, a0, a2
+; RV32-NEXT:    addi a3, a3, -1
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v16
+; RV32-NEXT:    vmflt.vf v0, v24, fa5
+; RV32-NEXT:    fsrmi a2, 3
+; RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; RV32-NEXT:    fsrm a2
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; RV32-NEXT:    bltu a0, a1, .LBB33_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:  .LBB33_2:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8
+; RV32-NEXT:    vmflt.vf v0, v24, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ceil_vv_nxv16f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    sub a2, a0, a1
+; RV64-NEXT:    sltu a3, a0, a2
+; RV64-NEXT:    addi a3, a3, -1
+; RV64-NEXT:    and a2, a3, a2
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v16
+; RV64-NEXT:    li a2, 1075
+; RV64-NEXT:    slli a2, a2, 52
+; RV64-NEXT:    fmv.d.x fa5, a2
+; RV64-NEXT:    vmflt.vf v0, v24, fa5
+; RV64-NEXT:    fsrmi a2, 3
+; RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; RV64-NEXT:    fsrm a2
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; RV64-NEXT:    bltu a0, a1, .LBB33_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB33_2:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8
+; RV64-NEXT:    vmflt.vf v0, v24, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
index 4fdaacef1105dff..1f7882ef4a49f15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
@@ -32,10 +32,11 @@ define <vscale x 1 x i8> @trunc_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_si8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI0_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -75,10 +76,11 @@ define <vscale x 1 x i8> @trunc_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_ui8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI1_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI1_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -116,10 +118,11 @@ define <vscale x 1 x i16> @trunc_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI2_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI2_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -155,10 +158,11 @@ define <vscale x 1 x i16> @trunc_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI3_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -274,10 +278,11 @@ define <vscale x 4 x i8> @trunc_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_si8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI8_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI8_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -317,10 +322,11 @@ define <vscale x 4 x i8> @trunc_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_ui8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI9_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI9_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -358,10 +364,11 @@ define <vscale x 4 x i16> @trunc_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI10_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -397,10 +404,11 @@ define <vscale x 4 x i16> @trunc_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI11_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -518,10 +526,11 @@ define <vscale x 1 x i8> @ceil_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_si8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI16_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI16_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -565,10 +574,11 @@ define <vscale x 1 x i8> @ceil_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_ui8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI17_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI17_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -610,10 +620,11 @@ define <vscale x 1 x i16> @ceil_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI18_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -653,10 +664,11 @@ define <vscale x 1 x i16> @ceil_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI19_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -792,10 +804,11 @@ define <vscale x 4 x i8> @ceil_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_si8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI24_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -839,10 +852,11 @@ define <vscale x 4 x i8> @ceil_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_ui8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI25_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI25_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -884,10 +898,11 @@ define <vscale x 4 x i16> @ceil_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI26_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI26_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -927,10 +942,11 @@ define <vscale x 4 x i16> @ceil_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI27_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI27_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -1064,10 +1080,11 @@ define <vscale x 1 x i8> @rint_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_si8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI32_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI32_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1107,10 +1124,11 @@ define <vscale x 1 x i8> @rint_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_ui8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI33_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI33_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1148,10 +1166,11 @@ define <vscale x 1 x i16> @rint_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI34_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI34_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1187,10 +1206,11 @@ define <vscale x 1 x i16> @rint_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI35_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI35_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1225,10 +1245,11 @@ define <vscale x 1 x i32> @rint_nxv1f64_to_si32(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_si32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI36_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1262,10 +1283,11 @@ define <vscale x 1 x i32> @rint_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_ui32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI37_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI37_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1297,10 +1319,11 @@ define <vscale x 1 x i64> @rint_nxv1f64_to_si64(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_si64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI38_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1330,10 +1353,11 @@ define <vscale x 1 x i64> @rint_nxv1f64_to_ui64(<vscale x 1 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv1f64_to_ui64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI39_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI39_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v9, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1374,10 +1398,11 @@ define <vscale x 4 x i8> @rint_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_si8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI40_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -1417,10 +1442,11 @@ define <vscale x 4 x i8> @rint_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_ui8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI41_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI41_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -1458,10 +1484,11 @@ define <vscale x 4 x i16> @rint_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI42_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -1497,10 +1524,11 @@ define <vscale x 4 x i16> @rint_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI43_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI43_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -1535,10 +1563,11 @@ define <vscale x 4 x i32> @rint_nxv4f64_to_si32(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_si32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI44_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI44_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -1572,10 +1601,11 @@ define <vscale x 4 x i32> @rint_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_ui32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI45_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI45_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -1607,10 +1637,11 @@ define <vscale x 4 x i64> @rint_nxv4f64_to_si64(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_si64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI46_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI46_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -1640,10 +1671,11 @@ define <vscale x 4 x i64> @rint_nxv4f64_to_ui64(<vscale x 4 x double> %x) {
 ;
 ; RV64-LABEL: rint_nxv4f64_to_ui64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI47_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI47_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vmflt.vf v0, v12, fa5
 ; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll
index 75747a6674b7b4a..a5959c805de3228 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s
 
 define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) strictfp {
 ; CHECK-LABEL: ceil_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -31,10 +32,11 @@ define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -53,10 +55,11 @@ define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -75,10 +78,11 @@ define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -97,10 +101,11 @@ define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -119,10 +124,11 @@ define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -247,88 +253,160 @@ define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) strictfp {
 declare <vscale x 16 x float> @llvm.experimental.constrained.ceil.nxv16f32(<vscale x 16 x float>, metadata)
 
 define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) strictfp {
-; CHECK-LABEL: ceil_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.experimental.constrained.ceil.nxv1f64(<vscale x 1 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.experimental.constrained.ceil.nxv1f64(<vscale x 1 x double>, metadata)
 
 define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) strictfp {
-; CHECK-LABEL: ceil_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.experimental.constrained.ceil.nxv2f64(<vscale x 2 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.experimental.constrained.ceil.nxv2f64(<vscale x 2 x double>, metadata)
 
 define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) strictfp {
-; CHECK-LABEL: ceil_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.experimental.constrained.ceil.nxv4f64(<vscale x 4 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.experimental.constrained.ceil.nxv4f64(<vscale x 4 x double>, metadata)
 
 define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) strictfp {
-; CHECK-LABEL: ceil_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.experimental.constrained.ceil.nxv8f64(<vscale x 8 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
index 9efc3183f15a522..fe9d07080d69afb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
@@ -1,16 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s
 
 define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: ceil_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -27,10 +28,11 @@ declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>)
 define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: ceil_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -47,10 +49,11 @@ declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>)
 define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: ceil_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -67,10 +70,11 @@ declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>)
 define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
 ; CHECK-LABEL: ceil_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -87,10 +91,11 @@ declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>)
 define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
 ; CHECK-LABEL: ceil_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -107,10 +112,11 @@ declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>)
 define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
 ; CHECK-LABEL: ceil_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -225,80 +231,144 @@ define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) {
 declare <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float>)
 
 define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) {
-; CHECK-LABEL: ceil_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double>)
 
 define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) {
-; CHECK-LABEL: ceil_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %x)
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
 
 define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) {
-; CHECK-LABEL: ceil_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double>)
 
 define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) {
-; CHECK-LABEL: ceil_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> %x)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll
index 31a945320445742..3c7069d452160b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s
 
 define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) strictfp {
 ; CHECK-LABEL: floor_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -31,10 +32,11 @@ define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -53,10 +55,11 @@ define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -75,10 +78,11 @@ define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -97,10 +101,11 @@ define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -119,10 +124,11 @@ define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -247,88 +253,160 @@ define <vscale x 16 x float> @floor_nxv16f32(<vscale x 16 x float> %x) strictfp
 declare <vscale x 16 x float> @llvm.experimental.constrained.floor.nxv16f32(<vscale x 16 x float>, metadata)
 
 define <vscale x 1 x double> @floor_nxv1f64(<vscale x 1 x double> %x) strictfp {
-; CHECK-LABEL: floor_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.experimental.constrained.floor.nxv1f64(<vscale x 1 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.experimental.constrained.floor.nxv1f64(<vscale x 1 x double>, metadata)
 
 define <vscale x 2 x double> @floor_nxv2f64(<vscale x 2 x double> %x) strictfp {
-; CHECK-LABEL: floor_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.experimental.constrained.floor.nxv2f64(<vscale x 2 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.experimental.constrained.floor.nxv2f64(<vscale x 2 x double>, metadata)
 
 define <vscale x 4 x double> @floor_nxv4f64(<vscale x 4 x double> %x) strictfp {
-; CHECK-LABEL: floor_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.experimental.constrained.floor.nxv4f64(<vscale x 4 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.experimental.constrained.floor.nxv4f64(<vscale x 4 x double>, metadata)
 
 define <vscale x 8 x double> @floor_nxv8f64(<vscale x 8 x double> %x) strictfp {
-; CHECK-LABEL: floor_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.experimental.constrained.floor.nxv8f64(<vscale x 8 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
index ec60b3ed3e0c88a..febca7d05b7e0d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
@@ -1,16 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s
 
 define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: floor_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -27,10 +28,11 @@ declare <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half>)
 define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: floor_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -47,10 +49,11 @@ declare <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half>)
 define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: floor_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -67,10 +70,11 @@ declare <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half>)
 define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) {
 ; CHECK-LABEL: floor_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -87,10 +91,11 @@ declare <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half>)
 define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) {
 ; CHECK-LABEL: floor_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -107,10 +112,11 @@ declare <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half>)
 define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) {
 ; CHECK-LABEL: floor_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -225,80 +231,144 @@ define <vscale x 16 x float> @floor_nxv16f32(<vscale x 16 x float> %x) {
 declare <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float>)
 
 define <vscale x 1 x double> @floor_nxv1f64(<vscale x 1 x double> %x) {
-; CHECK-LABEL: floor_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> %x)
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double>)
 
 define <vscale x 2 x double> @floor_nxv2f64(<vscale x 2 x double> %x) {
-; CHECK-LABEL: floor_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> %x)
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)
 
 define <vscale x 4 x double> @floor_nxv4f64(<vscale x 4 x double> %x) {
-; CHECK-LABEL: floor_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> %x)
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double>)
 
 define <vscale x 8 x double> @floor_nxv8f64(<vscale x 8 x double> %x) {
-; CHECK-LABEL: floor_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> %x)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
index 84e07e13bf92557..d3406ece71252d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
@@ -1,22 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
 
 declare <2 x half> @llvm.vp.ceil.v2f16(<2 x half>, <2 x i1>, i32)
 
 define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
@@ -55,10 +56,11 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
 define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 3
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <4 x half> @llvm.vp.ceil.v4f16(<4 x half>, <4 x i1>, i32)
 define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
@@ -139,10 +142,11 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
 define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 3
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <8 x half> @llvm.vp.ceil.v8f16(<8 x half>, <8 x i1>, i32)
 define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
@@ -225,10 +230,11 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 3
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
 ; ZVFH-LABEL: vp_ceil_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
@@ -313,10 +320,11 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
 define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 3
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -533,41 +541,75 @@ define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl)
 declare <2 x double> @llvm.vp.ceil.v2f64(<2 x double>, <2 x i1>, i32)
 
 define <2 x double> @vp_ceil_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v2f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI16_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v2f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
   ret <2 x double> %v
 }
 
 define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v2f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI17_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v2f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
@@ -577,43 +619,79 @@ define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl)
 declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
 
 define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v4f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v10, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v10
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v4f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v10, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v10
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
 }
 
 define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v4f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI19_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v10, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v4f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v10, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -623,43 +701,79 @@ define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl)
 declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
 
 define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v8f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v12, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v12
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v8f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v12, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v12
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
 
 define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v8f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI21_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v8f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -669,43 +783,79 @@ define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl)
 declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32)
 
 define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v15f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v15f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v15f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
 }
 
 define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v15f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v15f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v15f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
   %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -715,43 +865,79 @@ define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %ev
 declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
 
 define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v16f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v16f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
 }
 
 define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v16f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v16f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -761,114 +947,221 @@ define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %ev
 declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32)
 
 define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v32f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v24, v0, 2
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a1, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v32f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    addi sp, sp, -16
+; ZVFH-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-RV32-NEXT:    csrr a1, vlenb
+; ZVFH-RV32-NEXT:    slli a1, a1, 4
+; ZVFH-RV32-NEXT:    sub sp, sp, a1
+; ZVFH-RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-RV32-NEXT:    vmv1r.v v25, v0
+; ZVFH-RV32-NEXT:    csrr a1, vlenb
+; ZVFH-RV32-NEXT:    slli a1, a1, 3
+; ZVFH-RV32-NEXT:    add a1, sp, a1
+; ZVFH-RV32-NEXT:    addi a1, a1, 16
+; ZVFH-RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFH-RV32-NEXT:    li a2, 16
+; ZVFH-RV32-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFH-RV32-NEXT:    mv a1, a0
+; ZVFH-RV32-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFH-RV32-NEXT:  # %bb.1:
+; ZVFH-RV32-NEXT:    li a1, 16
+; ZVFH-RV32-NEXT:  .LBB26_2:
+; ZVFH-RV32-NEXT:    lui a2, %hi(.LCPI26_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
+; ZVFH-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a1, 3
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a1
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFH-RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFH-RV32-NEXT:    addi a1, a0, -16
+; ZVFH-RV32-NEXT:    sltu a0, a0, a1
+; ZVFH-RV32-NEXT:    addi a0, a0, -1
+; ZVFH-RV32-NEXT:    and a0, a0, a1
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV32-NEXT:    csrr a0, vlenb
+; ZVFH-RV32-NEXT:    slli a0, a0, 3
+; ZVFH-RV32-NEXT:    add a0, sp, a0
+; ZVFH-RV32-NEXT:    addi a0, a0, 16
+; ZVFH-RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-RV32-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFH-RV32-NEXT:    addi a0, sp, 16
+; ZVFH-RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFH-RV32-NEXT:    csrr a0, vlenb
+; ZVFH-RV32-NEXT:    slli a0, a0, 4
+; ZVFH-RV32-NEXT:    add sp, sp, a0
+; ZVFH-RV32-NEXT:    addi sp, sp, 16
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v32f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    addi sp, sp, -16
+; ZVFH-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-RV64-NEXT:    csrr a1, vlenb
+; ZVFH-RV64-NEXT:    slli a1, a1, 4
+; ZVFH-RV64-NEXT:    sub sp, sp, a1
+; ZVFH-RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-RV64-NEXT:    vmv1r.v v25, v0
+; ZVFH-RV64-NEXT:    csrr a1, vlenb
+; ZVFH-RV64-NEXT:    slli a1, a1, 3
+; ZVFH-RV64-NEXT:    add a1, sp, a1
+; ZVFH-RV64-NEXT:    addi a1, a1, 16
+; ZVFH-RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFH-RV64-NEXT:    li a2, 16
+; ZVFH-RV64-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFH-RV64-NEXT:    mv a1, a0
+; ZVFH-RV64-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFH-RV64-NEXT:  # %bb.1:
+; ZVFH-RV64-NEXT:    li a1, 16
+; ZVFH-RV64-NEXT:  .LBB26_2:
+; ZVFH-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    li a1, 1075
+; ZVFH-RV64-NEXT:    slli a1, a1, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a1, 3
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a1
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFH-RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFH-RV64-NEXT:    addi a1, a0, -16
+; ZVFH-RV64-NEXT:    sltu a0, a0, a1
+; ZVFH-RV64-NEXT:    addi a0, a0, -1
+; ZVFH-RV64-NEXT:    and a0, a0, a1
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV64-NEXT:    csrr a0, vlenb
+; ZVFH-RV64-NEXT:    slli a0, a0, 3
+; ZVFH-RV64-NEXT:    add a0, sp, a0
+; ZVFH-RV64-NEXT:    addi a0, a0, 16
+; ZVFH-RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-RV64-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFH-RV64-NEXT:    addi a0, sp, 16
+; ZVFH-RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFH-RV64-NEXT:    csrr a0, vlenb
+; ZVFH-RV64-NEXT:    slli a0, a0, 4
+; ZVFH-RV64-NEXT:    add sp, sp, a0
+; ZVFH-RV64-NEXT:    addi sp, sp, 16
+; ZVFH-RV64-NEXT:    ret
   %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
 
 define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_ceil_v32f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a1, 3
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_ceil_v32f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    li a2, 16
+; ZVFH-RV32-NEXT:    mv a1, a0
+; ZVFH-RV32-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFH-RV32-NEXT:  # %bb.1:
+; ZVFH-RV32-NEXT:    li a1, 16
+; ZVFH-RV32-NEXT:  .LBB27_2:
+; ZVFH-RV32-NEXT:    lui a2, %hi(.LCPI27_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
+; ZVFH-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV32-NEXT:    fsrmi a1, 3
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a1
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    addi a1, a0, -16
+; ZVFH-RV32-NEXT:    sltu a0, a0, a1
+; ZVFH-RV32-NEXT:    addi a0, a0, -1
+; ZVFH-RV32-NEXT:    and a0, a0, a1
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v16
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 3
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_ceil_v32f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    li a2, 16
+; ZVFH-RV64-NEXT:    mv a1, a0
+; ZVFH-RV64-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFH-RV64-NEXT:  # %bb.1:
+; ZVFH-RV64-NEXT:    li a1, 16
+; ZVFH-RV64-NEXT:  .LBB27_2:
+; ZVFH-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8
+; ZVFH-RV64-NEXT:    li a1, 1075
+; ZVFH-RV64-NEXT:    slli a1, a1, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV64-NEXT:    fsrmi a1, 3
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a1
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    addi a1, a0, -16
+; ZVFH-RV64-NEXT:    sltu a0, a0, a1
+; ZVFH-RV64-NEXT:    addi a0, a0, -1
+; ZVFH-RV64-NEXT:    and a0, a0, a1
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v16
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 3
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFH-RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; ZVFHMIN-RV32: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll
index 1e93a73ede5d657..3c314f3d548173d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck  --check-prefixes=CHECK,RV64 %s
 
 define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp {
 ; CHECK-LABEL: ceil_v1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -31,10 +32,11 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -53,10 +55,11 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -75,10 +78,11 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +101,11 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -120,10 +125,11 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -248,88 +254,160 @@ define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp {
 declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, metadata)
 
 define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp {
-; CHECK-LABEL: ceil_v1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_v1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_v1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %x, metadata !"fpexcept.strict")
   ret <1 x double> %a
 }
 declare <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double>, metadata)
 
 define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp {
-; CHECK-LABEL: ceil_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_v2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_v2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %x, metadata !"fpexcept.strict")
   ret <2 x double> %a
 }
 declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
 
 define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp {
-; CHECK-LABEL: ceil_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double> %x, metadata !"fpexcept.strict")
   ret <4 x double> %a
 }
 declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata)
 
 define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp {
-; CHECK-LABEL: ceil_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: ceil_v8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 3
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ceil_v8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 3
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %x, metadata !"fpexcept.strict")
   ret <8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll
index 53018939fc6eb44..077b6790c9c10b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <1 x half> @floor_v1f16(<1 x half> %x) strictfp {
 ; CHECK-LABEL: floor_v1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -31,10 +32,11 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -53,10 +55,11 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -75,10 +78,11 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +101,11 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -120,10 +125,11 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -248,88 +254,160 @@ define <16 x float> @floor_v16f32(<16 x float> %x) strictfp {
 declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, metadata)
 
 define <1 x double> @floor_v1f64(<1 x double> %x) strictfp {
-; CHECK-LABEL: floor_v1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_v1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_v1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %x, metadata !"fpexcept.strict")
   ret <1 x double> %a
 }
 declare <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double>, metadata)
 
 define <2 x double> @floor_v2f64(<2 x double> %x) strictfp {
-; CHECK-LABEL: floor_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_v2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_v2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %x, metadata !"fpexcept.strict")
   ret <2 x double> %a
 }
 declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
 
 define <4 x double> @floor_v4f64(<4 x double> %x) strictfp {
-; CHECK-LABEL: floor_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double> %x, metadata !"fpexcept.strict")
   ret <4 x double> %a
 }
 declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata)
 
 define <8 x double> @floor_v8f64(<8 x double> %x) strictfp {
-; CHECK-LABEL: floor_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: floor_v8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: floor_v8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %x, metadata !"fpexcept.strict")
   ret <8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
index 84b3e142d5aea3d..af6c7ace696dea1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
@@ -1,22 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVHW-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVHW-RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
 
 declare <2 x half> @llvm.vp.floor.v2f16(<2 x half>, <2 x i1>, i32)
 
 define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
@@ -55,10 +56,11 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
 define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 2
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <4 x half> @llvm.vp.floor.v4f16(<4 x half>, <4 x i1>, i32)
 define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
@@ -139,10 +142,11 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
 define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 2
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <8 x half> @llvm.vp.floor.v8f16(<8 x half>, <8 x i1>, i32)
 define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
@@ -225,10 +230,11 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 2
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vp_floor_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
@@ -313,10 +320,11 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
 define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 2
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -533,41 +541,141 @@ define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl
 declare <2 x double> @llvm.vp.floor.v2f64(<2 x double>, <2 x i1>, i32)
 
 define <2 x double> @vp_floor_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v2f64:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI16_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v9, v8, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v2f64:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v9, v8, v0.t
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v2f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI16_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v2f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
   ret <2 x double> %v
 }
 
 define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v2f64_unmasked:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI17_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v9, v8
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v2f64_unmasked:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v9, v8
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v2f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI17_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v2f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
@@ -577,43 +685,149 @@ define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl)
 declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
 
 define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v4f64:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    vmv1r.v v10, v0
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v12, v8, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVHW-RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v10
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v4f64:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vmv1r.v v10, v0
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v12, v8, v0.t
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVHW-RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v10
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v4f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v4f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
 }
 
 define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v4f64_unmasked:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI19_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v10, v8
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v4f64_unmasked:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v10, v8
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v4f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI19_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v4f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -623,43 +837,149 @@ define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl)
 declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
 
 define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v8f64:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    vmv1r.v v12, v0
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVHW-RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v12
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v8f64:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vmv1r.v v12, v0
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVHW-RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v12
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v8f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v8f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
 
 define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v8f64_unmasked:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI21_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v12, v8
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v8f64_unmasked:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v12, v8
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v8f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI21_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v8f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -669,43 +989,149 @@ define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl)
 declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32)
 
 define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v15f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v15f64:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    vmv1r.v v16, v0
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v16
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v15f64:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vmv1r.v v16, v0
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v16
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v15f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v15f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
 }
 
 define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v15f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v15f64_unmasked:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v16, v8
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v15f64_unmasked:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v16, v8
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v15f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v15f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
   %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -715,43 +1141,149 @@ define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %e
 declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
 
 define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v16f64:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    vmv1r.v v16, v0
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v16
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v16f64:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vmv1r.v v16, v0
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v16
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v16f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v16f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
 }
 
 define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v16f64_unmasked:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v16, v8
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v16f64_unmasked:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v16, v8
+; ZVHW-RV64-NEXT:    li a0, 1075
+; ZVHW-RV64-NEXT:    slli a0, a0, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v16f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v16f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -761,112 +1293,425 @@ define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %e
 declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32)
 
 define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v32f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v24, v0, 2
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a1, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v32f64:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    addi sp, sp, -16
+; ZVHW-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVHW-RV32-NEXT:    csrr a1, vlenb
+; ZVHW-RV32-NEXT:    slli a1, a1, 4
+; ZVHW-RV32-NEXT:    sub sp, sp, a1
+; ZVHW-RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVHW-RV32-NEXT:    vmv1r.v v25, v0
+; ZVHW-RV32-NEXT:    csrr a1, vlenb
+; ZVHW-RV32-NEXT:    slli a1, a1, 3
+; ZVHW-RV32-NEXT:    add a1, sp, a1
+; ZVHW-RV32-NEXT:    addi a1, a1, 16
+; ZVHW-RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVHW-RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVHW-RV32-NEXT:    li a2, 16
+; ZVHW-RV32-NEXT:    vslidedown.vi v24, v0, 2
+; ZVHW-RV32-NEXT:    mv a1, a0
+; ZVHW-RV32-NEXT:    bltu a0, a2, .LBB26_2
+; ZVHW-RV32-NEXT:  # %bb.1:
+; ZVHW-RV32-NEXT:    li a1, 16
+; ZVHW-RV32-NEXT:  .LBB26_2:
+; ZVHW-RV32-NEXT:    lui a2, %hi(.LCPI26_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
+; ZVHW-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v25
+; ZVHW-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVHW-RV32-NEXT:    fsrmi a1, 2
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v25
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a1
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV32-NEXT:    addi a1, sp, 16
+; ZVHW-RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVHW-RV32-NEXT:    addi a1, a0, -16
+; ZVHW-RV32-NEXT:    sltu a0, a0, a1
+; ZVHW-RV32-NEXT:    addi a0, a0, -1
+; ZVHW-RV32-NEXT:    and a0, a0, a1
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v24
+; ZVHW-RV32-NEXT:    csrr a0, vlenb
+; ZVHW-RV32-NEXT:    slli a0, a0, 3
+; ZVHW-RV32-NEXT:    add a0, sp, a0
+; ZVHW-RV32-NEXT:    addi a0, a0, 16
+; ZVHW-RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVHW-RV32-NEXT:    vfabs.v v8, v16, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vmv1r.v v0, v24
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVHW-RV32-NEXT:    addi a0, sp, 16
+; ZVHW-RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVHW-RV32-NEXT:    csrr a0, vlenb
+; ZVHW-RV32-NEXT:    slli a0, a0, 4
+; ZVHW-RV32-NEXT:    add sp, sp, a0
+; ZVHW-RV32-NEXT:    addi sp, sp, 16
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v32f64:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    addi sp, sp, -16
+; ZVHW-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVHW-RV64-NEXT:    csrr a1, vlenb
+; ZVHW-RV64-NEXT:    slli a1, a1, 4
+; ZVHW-RV64-NEXT:    sub sp, sp, a1
+; ZVHW-RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVHW-RV64-NEXT:    vmv1r.v v25, v0
+; ZVHW-RV64-NEXT:    csrr a1, vlenb
+; ZVHW-RV64-NEXT:    slli a1, a1, 3
+; ZVHW-RV64-NEXT:    add a1, sp, a1
+; ZVHW-RV64-NEXT:    addi a1, a1, 16
+; ZVHW-RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVHW-RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVHW-RV64-NEXT:    li a2, 16
+; ZVHW-RV64-NEXT:    vslidedown.vi v24, v0, 2
+; ZVHW-RV64-NEXT:    mv a1, a0
+; ZVHW-RV64-NEXT:    bltu a0, a2, .LBB26_2
+; ZVHW-RV64-NEXT:  # %bb.1:
+; ZVHW-RV64-NEXT:    li a1, 16
+; ZVHW-RV64-NEXT:  .LBB26_2:
+; ZVHW-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v25
+; ZVHW-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVHW-RV64-NEXT:    li a1, 1075
+; ZVHW-RV64-NEXT:    slli a1, a1, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVHW-RV64-NEXT:    fsrmi a1, 2
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v25
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a1
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVHW-RV64-NEXT:    addi a1, sp, 16
+; ZVHW-RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVHW-RV64-NEXT:    addi a1, a0, -16
+; ZVHW-RV64-NEXT:    sltu a0, a0, a1
+; ZVHW-RV64-NEXT:    addi a0, a0, -1
+; ZVHW-RV64-NEXT:    and a0, a0, a1
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v24
+; ZVHW-RV64-NEXT:    csrr a0, vlenb
+; ZVHW-RV64-NEXT:    slli a0, a0, 3
+; ZVHW-RV64-NEXT:    add a0, sp, a0
+; ZVHW-RV64-NEXT:    addi a0, a0, 16
+; ZVHW-RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVHW-RV64-NEXT:    vfabs.v v8, v16, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vmv1r.v v0, v24
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVHW-RV64-NEXT:    addi a0, sp, 16
+; ZVHW-RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVHW-RV64-NEXT:    csrr a0, vlenb
+; ZVHW-RV64-NEXT:    slli a0, a0, 4
+; ZVHW-RV64-NEXT:    add sp, sp, a0
+; ZVHW-RV64-NEXT:    addi sp, sp, 16
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v32f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a1, a1, 4
+; ZVFHMIN-RV32-NEXT:    sub sp, sp, a1
+; ZVFHMIN-RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v25, v0
+; ZVFHMIN-RV32-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a1, a1, 3
+; ZVFHMIN-RV32-NEXT:    add a1, sp, a1
+; ZVFHMIN-RV32-NEXT:    addi a1, a1, 16
+; ZVFHMIN-RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    li a2, 16
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFHMIN-RV32-NEXT:    mv a1, a0
+; ZVFHMIN-RV32-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFHMIN-RV32-NEXT:  # %bb.1:
+; ZVFHMIN-RV32-NEXT:    li a1, 16
+; ZVFHMIN-RV32-NEXT:  .LBB26_2:
+; ZVFHMIN-RV32-NEXT:    lui a2, %hi(.LCPI26_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a1, 2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a1
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV32-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV32-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV32-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a0, a0, 3
+; ZVFHMIN-RV32-NEXT:    add a0, sp, a0
+; ZVFHMIN-RV32-NEXT:    addi a0, a0, 16
+; ZVFHMIN-RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    addi a0, sp, 16
+; ZVFHMIN-RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV32-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a0, a0, 4
+; ZVFHMIN-RV32-NEXT:    add sp, sp, a0
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v32f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 4
+; ZVFHMIN-RV64-NEXT:    sub sp, sp, a1
+; ZVFHMIN-RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v25, v0
+; ZVFHMIN-RV64-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 3
+; ZVFHMIN-RV64-NEXT:    add a1, sp, a1
+; ZVFHMIN-RV64-NEXT:    addi a1, a1, 16
+; ZVFHMIN-RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFHMIN-RV64-NEXT:    li a2, 16
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFHMIN-RV64-NEXT:    mv a1, a0
+; ZVFHMIN-RV64-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFHMIN-RV64-NEXT:  # %bb.1:
+; ZVFHMIN-RV64-NEXT:    li a1, 16
+; ZVFHMIN-RV64-NEXT:  .LBB26_2:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a1, 1075
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a1, 2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a1
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV64-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV64-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV64-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV64-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 3
+; ZVFHMIN-RV64-NEXT:    add a0, sp, a0
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 16
+; ZVFHMIN-RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    addi a0, sp, 16
+; ZVFHMIN-RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV64-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 4
+; ZVFHMIN-RV64-NEXT:    add sp, sp, a0
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
 
 define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_v32f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a1, 2
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    ret
+; ZVHW-RV32-LABEL: vp_floor_v32f64_unmasked:
+; ZVHW-RV32:       # %bb.0:
+; ZVHW-RV32-NEXT:    li a2, 16
+; ZVHW-RV32-NEXT:    mv a1, a0
+; ZVHW-RV32-NEXT:    bltu a0, a2, .LBB27_2
+; ZVHW-RV32-NEXT:  # %bb.1:
+; ZVHW-RV32-NEXT:    li a1, 16
+; ZVHW-RV32-NEXT:  .LBB27_2:
+; ZVHW-RV32-NEXT:    lui a2, %hi(.LCPI27_0)
+; ZVHW-RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
+; ZVHW-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v24, v8
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVHW-RV32-NEXT:    fsrmi a1, 2
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVHW-RV32-NEXT:    fsrm a1
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVHW-RV32-NEXT:    addi a1, a0, -16
+; ZVHW-RV32-NEXT:    sltu a0, a0, a1
+; ZVHW-RV32-NEXT:    addi a0, a0, -1
+; ZVHW-RV32-NEXT:    and a0, a0, a1
+; ZVHW-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV32-NEXT:    vfabs.v v24, v16
+; ZVHW-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVHW-RV32-NEXT:    fsrmi a0, 2
+; ZVHW-RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVHW-RV32-NEXT:    fsrm a0
+; ZVHW-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVHW-RV32-NEXT:    ret
+;
+; ZVHW-RV64-LABEL: vp_floor_v32f64_unmasked:
+; ZVHW-RV64:       # %bb.0:
+; ZVHW-RV64-NEXT:    li a2, 16
+; ZVHW-RV64-NEXT:    mv a1, a0
+; ZVHW-RV64-NEXT:    bltu a0, a2, .LBB27_2
+; ZVHW-RV64-NEXT:  # %bb.1:
+; ZVHW-RV64-NEXT:    li a1, 16
+; ZVHW-RV64-NEXT:  .LBB27_2:
+; ZVHW-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v24, v8
+; ZVHW-RV64-NEXT:    li a1, 1075
+; ZVHW-RV64-NEXT:    slli a1, a1, 52
+; ZVHW-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVHW-RV64-NEXT:    fsrmi a1, 2
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVHW-RV64-NEXT:    fsrm a1
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVHW-RV64-NEXT:    addi a1, a0, -16
+; ZVHW-RV64-NEXT:    sltu a0, a0, a1
+; ZVHW-RV64-NEXT:    addi a0, a0, -1
+; ZVHW-RV64-NEXT:    and a0, a0, a1
+; ZVHW-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVHW-RV64-NEXT:    vfabs.v v24, v16
+; ZVHW-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVHW-RV64-NEXT:    fsrmi a0, 2
+; ZVHW-RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVHW-RV64-NEXT:    fsrm a0
+; ZVHW-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVHW-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVHW-RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVHW-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_floor_v32f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    li a2, 16
+; ZVFHMIN-RV32-NEXT:    mv a1, a0
+; ZVFHMIN-RV32-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFHMIN-RV32-NEXT:  # %bb.1:
+; ZVFHMIN-RV32-NEXT:    li a1, 16
+; ZVFHMIN-RV32-NEXT:  .LBB27_2:
+; ZVFHMIN-RV32-NEXT:    lui a2, %hi(.LCPI27_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a1, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a1
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV32-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV32-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v16
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_floor_v32f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    li a2, 16
+; ZVFHMIN-RV64-NEXT:    mv a1, a0
+; ZVFHMIN-RV64-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFHMIN-RV64-NEXT:  # %bb.1:
+; ZVFHMIN-RV64-NEXT:    li a1, 16
+; ZVFHMIN-RV64-NEXT:  .LBB27_2:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8
+; ZVFHMIN-RV64-NEXT:    li a1, 1075
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a1, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a1
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV64-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV64-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v16
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
index 1b50214bbf164da..b8511676d689195 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
@@ -11,10 +11,11 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -34,10 +35,11 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -57,10 +59,11 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -80,10 +83,11 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -104,10 +108,11 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -215,22 +220,6 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp {
 declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
 
 define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
-; CHECK-LABEL: nearbyint_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI9_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x double> %r
 }
@@ -238,22 +227,6 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
 declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
 
 define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
-; CHECK-LABEL: nearbyint_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI10_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI10_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %r
 }
@@ -261,22 +234,6 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
 declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double>, metadata, metadata)
 
 define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp {
-; CHECK-LABEL: nearbyint_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %r
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index 59eb4b89a2f56e8..70a29366a03f816 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -27,29 +27,49 @@ define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) {
 }
 
 define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
-; CHECK-LABEL: shuffle_fv_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI2_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v0, 9
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa5, v0
-; CHECK-NEXT:    ret
+; RV32-LABEL: shuffle_fv_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI2_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI2_0)(a0)
+; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV32-NEXT:    vmv.v.i v0, 9
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vfmerge.vfm v8, v8, fa5, v0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: shuffle_fv_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.i v0, 9
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
   %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
   ret <4 x double> %s
 }
 
 define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
-; CHECK-LABEL: shuffle_vf_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI3_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v0, 6
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa5, v0
-; CHECK-NEXT:    ret
+; RV32-LABEL: shuffle_vf_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI3_0)(a0)
+; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV32-NEXT:    vmv.v.i v0, 6
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vfmerge.vfm v8, v8, fa5, v0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: shuffle_vf_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.i v0, 6
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
   %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
   ret <4 x double> %s
 }
@@ -107,19 +127,35 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
 }
 
 define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
-; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vid.v v12
-; CHECK-NEXT:    lui a0, %hi(.LCPI7_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI7_0)
-; CHECK-NEXT:    vlse64.v v10, (a0), zero
-; CHECK-NEXT:    vrsub.vi v12, v12, 4
-; CHECK-NEXT:    vmv.v.i v0, 12
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
+; RV32-LABEL: vrgather_shuffle_xv_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT:    vid.v v12
+; RV32-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32-NEXT:    addi a0, a0, %lo(.LCPI7_0)
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vrsub.vi v12, v12, 4
+; RV32-NEXT:    vmv.v.i v0, 12
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vrgatherei16.vv v10, v8, v12, v0.t
+; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vrgather_shuffle_xv_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT:    vid.v v10
+; RV64-NEXT:    vrsub.vi v12, v10, 4
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a0
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.i v0, 12
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-NEXT:    vrgatherei16.vv v10, v8, v12, v0.t
+; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    ret
   %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
   ret <4 x double> %s
 }
@@ -143,14 +179,16 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
 ; RV64-LABEL: vrgather_shuffle_vx_v4f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vid.v v12
-; RV64-NEXT:    lui a0, %hi(.LCPI8_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI8_0)
-; RV64-NEXT:    vlse64.v v10, (a0), zero
+; RV64-NEXT:    vid.v v10
 ; RV64-NEXT:    li a0, 3
-; RV64-NEXT:    vmul.vx v12, v12, a0
+; RV64-NEXT:    vmul.vx v12, v10, a0
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    slli a0, a0, 62
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a0
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
 ; RV64-NEXT:    vmv.v.i v0, 3
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; RV64-NEXT:    vrgatherei16.vv v10, v8, v12, v0.t
 ; RV64-NEXT:    vmv.v.v v8, v10
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 6933ffa7bd9c054..1cee3582192be6a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -5393,9 +5393,10 @@ define void @trunc_v8f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI115_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI115_0)(a1)
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -5415,10 +5416,11 @@ define void @trunc_v6f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI116_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI116_0)(a1)
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -5472,20 +5474,98 @@ define void @trunc_v4f32(ptr %x) {
 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
 
 define void @trunc_v2f64(ptr %x) {
-; CHECK-LABEL: trunc_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI118_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI118_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
+; LMULMAX1-RV32-LABEL: trunc_v2f64:
+; LMULMAX1-RV32:       # %bb.0:
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI118_0)
+; LMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI118_0)(a1)
+; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    ret
+;
+; LMULMAX1-RV64-LABEL: trunc_v2f64:
+; LMULMAX1-RV64:       # %bb.0:
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV64-NEXT:    li a1, 1075
+; LMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; LMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV32-LABEL: trunc_v2f64:
+; ZVFHMINLMULMAX2-RV32:       # %bb.0:
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI118_0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    fld fa5, %lo(.LCPI118_0)(a1)
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV64-LABEL: trunc_v2f64:
+; ZVFHMINLMULMAX2-RV64:       # %bb.0:
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX2-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV32-LABEL: trunc_v2f64:
+; ZVFHMINLMULMAX1-RV32:       # %bb.0:
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI118_0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI118_0)(a1)
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV64-LABEL: trunc_v2f64:
+; ZVFHMINLMULMAX1-RV64:       # %bb.0:
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    ret
   %a = load <2 x double>, ptr %x
   %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
   store <2 x double> %b, ptr %x
@@ -5498,9 +5578,10 @@ define void @ceil_v8f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI119_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI119_0)(a1)
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a1, 3
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -5543,10 +5624,11 @@ define void @ceil_v6f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI120_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI120_0)(a1)
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a1, 3
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -5708,22 +5790,110 @@ define void @ceil_v4f32(ptr %x) {
 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
 
 define void @ceil_v2f64(ptr %x) {
-; CHECK-LABEL: ceil_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI122_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI122_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
+; LMULMAX1-RV32-LABEL: ceil_v2f64:
+; LMULMAX1-RV32:       # %bb.0:
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI122_0)
+; LMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI122_0)(a1)
+; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV32-NEXT:    fsrmi a1, 3
+; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    fsrm a1
+; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    ret
+;
+; LMULMAX1-RV64-LABEL: ceil_v2f64:
+; LMULMAX1-RV64:       # %bb.0:
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV64-NEXT:    li a1, 1075
+; LMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; LMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV64-NEXT:    fsrmi a1, 3
+; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    fsrm a1
+; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV32-LABEL: ceil_v2f64:
+; ZVFHMINLMULMAX2-RV32:       # %bb.0:
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI122_0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    fld fa5, %lo(.LCPI122_0)(a1)
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV32-NEXT:    fsrmi a1, 3
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    fsrm a1
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV64-LABEL: ceil_v2f64:
+; ZVFHMINLMULMAX2-RV64:       # %bb.0:
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX2-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV64-NEXT:    fsrmi a1, 3
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    fsrm a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV32-LABEL: ceil_v2f64:
+; ZVFHMINLMULMAX1-RV32:       # %bb.0:
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI122_0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI122_0)(a1)
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV32-NEXT:    fsrmi a1, 3
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    fsrm a1
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV64-LABEL: ceil_v2f64:
+; ZVFHMINLMULMAX1-RV64:       # %bb.0:
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV64-NEXT:    fsrmi a1, 3
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    fsrm a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    ret
   %a = load <2 x double>, ptr %x
   %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
   store <2 x double> %b, ptr %x
@@ -5736,9 +5906,10 @@ define void @floor_v8f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI123_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI123_0)(a1)
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a1, 2
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -5781,10 +5952,11 @@ define void @floor_v6f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI124_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI124_0)(a1)
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a1, 2
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -5946,22 +6118,110 @@ define void @floor_v4f32(ptr %x) {
 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
 
 define void @floor_v2f64(ptr %x) {
-; CHECK-LABEL: floor_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI126_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI126_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
+; LMULMAX1-RV32-LABEL: floor_v2f64:
+; LMULMAX1-RV32:       # %bb.0:
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI126_0)
+; LMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI126_0)(a1)
+; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV32-NEXT:    fsrmi a1, 2
+; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    fsrm a1
+; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    ret
+;
+; LMULMAX1-RV64-LABEL: floor_v2f64:
+; LMULMAX1-RV64:       # %bb.0:
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV64-NEXT:    li a1, 1075
+; LMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; LMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV64-NEXT:    fsrmi a1, 2
+; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    fsrm a1
+; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV32-LABEL: floor_v2f64:
+; ZVFHMINLMULMAX2-RV32:       # %bb.0:
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI126_0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    fld fa5, %lo(.LCPI126_0)(a1)
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV32-NEXT:    fsrmi a1, 2
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    fsrm a1
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV64-LABEL: floor_v2f64:
+; ZVFHMINLMULMAX2-RV64:       # %bb.0:
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX2-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV64-NEXT:    fsrmi a1, 2
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    fsrm a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV32-LABEL: floor_v2f64:
+; ZVFHMINLMULMAX1-RV32:       # %bb.0:
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI126_0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI126_0)(a1)
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV32-NEXT:    fsrmi a1, 2
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    fsrm a1
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV64-LABEL: floor_v2f64:
+; ZVFHMINLMULMAX1-RV64:       # %bb.0:
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV64-NEXT:    fsrmi a1, 2
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    fsrm a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    ret
   %a = load <2 x double>, ptr %x
   %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
   store <2 x double> %b, ptr %x
@@ -5974,9 +6234,10 @@ define void @round_v8f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI127_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI127_0)(a1)
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a1, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -6019,10 +6280,11 @@ define void @round_v6f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI128_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI128_0)(a1)
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a1, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -6184,22 +6446,110 @@ define void @round_v4f32(ptr %x) {
 declare <4 x float> @llvm.round.v4f32(<4 x float>)
 
 define void @round_v2f64(ptr %x) {
-; CHECK-LABEL: round_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI130_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI130_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
+; LMULMAX1-RV32-LABEL: round_v2f64:
+; LMULMAX1-RV32:       # %bb.0:
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI130_0)
+; LMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI130_0)(a1)
+; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV32-NEXT:    fsrmi a1, 4
+; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    fsrm a1
+; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    ret
+;
+; LMULMAX1-RV64-LABEL: round_v2f64:
+; LMULMAX1-RV64:       # %bb.0:
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV64-NEXT:    li a1, 1075
+; LMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; LMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV64-NEXT:    fsrmi a1, 4
+; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    fsrm a1
+; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV32-LABEL: round_v2f64:
+; ZVFHMINLMULMAX2-RV32:       # %bb.0:
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI130_0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    fld fa5, %lo(.LCPI130_0)(a1)
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV32-NEXT:    fsrmi a1, 4
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    fsrm a1
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV64-LABEL: round_v2f64:
+; ZVFHMINLMULMAX2-RV64:       # %bb.0:
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX2-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV64-NEXT:    fsrmi a1, 4
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    fsrm a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV32-LABEL: round_v2f64:
+; ZVFHMINLMULMAX1-RV32:       # %bb.0:
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI130_0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI130_0)(a1)
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV32-NEXT:    fsrmi a1, 4
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    fsrm a1
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV64-LABEL: round_v2f64:
+; ZVFHMINLMULMAX1-RV64:       # %bb.0:
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV64-NEXT:    fsrmi a1, 4
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    fsrm a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    ret
   %a = load <2 x double>, ptr %x
   %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
   store <2 x double> %b, ptr %x
@@ -6212,9 +6562,10 @@ define void @rint_v8f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI131_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI131_0)(a1)
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -6286,20 +6637,98 @@ define void @rint_v4f32(ptr %x) {
 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
 
 define void @rint_v2f64(ptr %x) {
-; CHECK-LABEL: rint_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI133_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI133_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
+; LMULMAX1-RV32-LABEL: rint_v2f64:
+; LMULMAX1-RV32:       # %bb.0:
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI133_0)
+; LMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI133_0)(a1)
+; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    ret
+;
+; LMULMAX1-RV64-LABEL: rint_v2f64:
+; LMULMAX1-RV64:       # %bb.0:
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV64-NEXT:    li a1, 1075
+; LMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; LMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV32-LABEL: rint_v2f64:
+; ZVFHMINLMULMAX2-RV32:       # %bb.0:
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI133_0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    fld fa5, %lo(.LCPI133_0)(a1)
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV64-LABEL: rint_v2f64:
+; ZVFHMINLMULMAX2-RV64:       # %bb.0:
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX2-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV32-LABEL: rint_v2f64:
+; ZVFHMINLMULMAX1-RV32:       # %bb.0:
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI133_0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI133_0)(a1)
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV64-LABEL: rint_v2f64:
+; ZVFHMINLMULMAX1-RV64:       # %bb.0:
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    ret
   %a = load <2 x double>, ptr %x
   %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
   store <2 x double> %b, ptr %x
@@ -6312,9 +6741,10 @@ define void @nearbyint_v8f16(ptr %x) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI134_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI134_0)(a1)
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a1, 25
+; ZVFH-NEXT:    slli a1, a1, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a1
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    frflags a1
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -6394,22 +6824,110 @@ define void @nearbyint_v4f32(ptr %x) {
 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
 
 define void @nearbyint_v2f64(ptr %x) {
-; CHECK-LABEL: nearbyint_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI136_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI136_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a1
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
+; LMULMAX1-RV32-LABEL: nearbyint_v2f64:
+; LMULMAX1-RV32:       # %bb.0:
+; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI136_0)
+; LMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI136_0)(a1)
+; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV32-NEXT:    frflags a1
+; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV32-NEXT:    fsflags a1
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV32-NEXT:    ret
+;
+; LMULMAX1-RV64-LABEL: nearbyint_v2f64:
+; LMULMAX1-RV64:       # %bb.0:
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; LMULMAX1-RV64-NEXT:    li a1, 1075
+; LMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; LMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; LMULMAX1-RV64-NEXT:    frflags a1
+; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; LMULMAX1-RV64-NEXT:    fsflags a1
+; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV32-LABEL: nearbyint_v2f64:
+; ZVFHMINLMULMAX2-RV32:       # %bb.0:
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI136_0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    fld fa5, %lo(.LCPI136_0)(a1)
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV32-NEXT:    frflags a1
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    fsflags a1
+; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX2-RV64-LABEL: nearbyint_v2f64:
+; ZVFHMINLMULMAX2-RV64:       # %bb.0:
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX2-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX2-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX2-RV64-NEXT:    frflags a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    fsflags a1
+; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX2-RV64-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV32-LABEL: nearbyint_v2f64:
+; ZVFHMINLMULMAX1-RV32:       # %bb.0:
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI136_0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    fld fa5, %lo(.LCPI136_0)(a1)
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV32-NEXT:    frflags a1
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    fsflags a1
+; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV32-NEXT:    ret
+;
+; ZVFHMINLMULMAX1-RV64-LABEL: nearbyint_v2f64:
+; ZVFHMINLMULMAX1-RV64:       # %bb.0:
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMINLMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMINLMULMAX1-RV64-NEXT:    li a1, 1075
+; ZVFHMINLMULMAX1-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMINLMULMAX1-RV64-NEXT:    frflags a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    fsflags a1
+; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMINLMULMAX1-RV64-NEXT:    ret
   %a = load <2 x double>, ptr %x
   %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
   store <2 x double> %b, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index 645b53727a059b9..374a086e6e0e6d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -222,13 +222,15 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vslidedown.vi v9, v8, 1
 ; RV64-NEXT:    vfmv.f.s fa5, v9
-; RV64-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV64-NEXT:    fld fa4, %lo(.LCPI10_0)(a0)
-; RV64-NEXT:    lui a0, %hi(.LCPI10_1)
-; RV64-NEXT:    fld fa3, %lo(.LCPI10_1)(a0)
 ; RV64-NEXT:    feq.d a0, fa5, fa5
 ; RV64-NEXT:    neg a0, a0
+; RV64-NEXT:    li a2, -509
+; RV64-NEXT:    slli a2, a2, 53
+; RV64-NEXT:    fmv.d.x fa4, a2
 ; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    lui a2, 65919
+; RV64-NEXT:    slli a2, a2, 34
+; RV64-NEXT:    fmv.d.x fa3, a2
 ; RV64-NEXT:    fmin.d fa5, fa5, fa3
 ; RV64-NEXT:    fcvt.l.d a2, fa5, rtz
 ; RV64-NEXT:    and a0, a0, a2
@@ -280,20 +282,21 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    lui a0, %hi(.LCPI11_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; RV64-NEXT:    vfmv.f.s fa4, v8
-; RV64-NEXT:    fmv.d.x fa3, zero
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
-; RV64-NEXT:    fmin.d fa4, fa4, fa5
-; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fmv.d.x fa4, zero
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    lui a0, 131967
+; RV64-NEXT:    slli a0, a0, 33
+; RV64-NEXT:    fmv.d.x fa3, a0
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vslide1down.vx v9, v8, a0
 ; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV64-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-NEXT:    vfmv.f.s fa4, v8
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
-; RV64-NEXT:    fmin.d fa5, fa4, fa5
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
 ; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; RV64-NEXT:    vslide1down.vx v8, v9, a0
@@ -424,14 +427,16 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV64-NEXT:    vse64.v v8, (a0)
 ; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV64-NEXT:    vslidedown.vi v10, v8, 1
-; RV64-NEXT:    vfmv.f.s fa3, v10
-; RV64-NEXT:    lui a0, %hi(.LCPI12_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; RV64-NEXT:    lui a0, %hi(.LCPI12_1)
-; RV64-NEXT:    fld fa4, %lo(.LCPI12_1)(a0)
-; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    vfmv.f.s fa4, v10
+; RV64-NEXT:    feq.d a0, fa4, fa4
 ; RV64-NEXT:    neg a0, a0
-; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    li a2, -509
+; RV64-NEXT:    slli a2, a2, 53
+; RV64-NEXT:    fmv.d.x fa5, a2
+; RV64-NEXT:    fmax.d fa3, fa4, fa5
+; RV64-NEXT:    lui a2, 65919
+; RV64-NEXT:    slli a2, a2, 34
+; RV64-NEXT:    fmv.d.x fa4, a2
 ; RV64-NEXT:    fmin.d fa3, fa3, fa4
 ; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
 ; RV64-NEXT:    and a0, a0, a2
@@ -604,57 +609,58 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    mv a0, sp
 ; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; RV64-NEXT:    vfmv.f.s fa4, v8
-; RV64-NEXT:    fmv.d.x fa3, zero
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
-; RV64-NEXT:    fmin.d fa4, fa4, fa5
-; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fmv.d.x fa4, zero
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    lui a0, 131967
+; RV64-NEXT:    slli a0, a0, 33
+; RV64-NEXT:    fmv.d.x fa3, a0
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vslide1down.vx v10, v8, a0
 ; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV64-NEXT:    vslidedown.vi v11, v8, 1
-; RV64-NEXT:    vfmv.f.s fa4, v11
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
-; RV64-NEXT:    fmin.d fa4, fa4, fa5
-; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
+; RV64-NEXT:    vfmv.f.s fa5, v11
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vi v12, v8, 2
-; RV64-NEXT:    vfmv.f.s fa4, v12
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
-; RV64-NEXT:    fmin.d fa4, fa4, fa5
-; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
+; RV64-NEXT:    vfmv.f.s fa5, v12
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    vslide1down.vx v10, v10, a0
 ; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vi v8, v8, 3
-; RV64-NEXT:    vfmv.f.s fa4, v8
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    fld fa2, 32(sp)
-; RV64-NEXT:    fmin.d fa4, fa4, fa5
-; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    fld fa4, 40(sp)
-; RV64-NEXT:    fmax.d fa2, fa2, fa3
-; RV64-NEXT:    fmin.d fa2, fa2, fa5
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
+; RV64-NEXT:    fld fa5, 40(sp)
+; RV64-NEXT:    fmax.d fa2, fa2, fa4
+; RV64-NEXT:    fmin.d fa2, fa2, fa3
 ; RV64-NEXT:    fcvt.lu.d a2, fa2, rtz
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
 ; RV64-NEXT:    fld fa2, 48(sp)
-; RV64-NEXT:    fmin.d fa4, fa4, fa5
-; RV64-NEXT:    fcvt.lu.d a3, fa4, rtz
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.lu.d a3, fa5, rtz
 ; RV64-NEXT:    vslide1down.vx v8, v10, a0
-; RV64-NEXT:    fmax.d fa4, fa2, fa3
-; RV64-NEXT:    fmin.d fa4, fa4, fa5
-; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT:    fld fa4, 56(sp)
+; RV64-NEXT:    fmax.d fa5, fa2, fa4
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
+; RV64-NEXT:    fld fa5, 56(sp)
 ; RV64-NEXT:    vslide1down.vx v8, v8, a2
 ; RV64-NEXT:    vslide1down.vx v8, v8, a3
 ; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    fmax.d fa4, fa4, fa3
-; RV64-NEXT:    fmin.d fa5, fa4, fa5
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
 ; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
 ; RV64-NEXT:    vslide1down.vx v8, v8, a0
 ; RV64-NEXT:    vse8.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll
index f189354237ee3ae..a1ca148b03e9d10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; This file tests the code generation for `llvm.experimental.constrained.round.*` on scalable vector type.
 
@@ -11,10 +11,11 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -33,10 +34,11 @@ define <2 x half> @round_v2f16(<2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -55,10 +57,11 @@ define <4 x half> @round_v4f16(<4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -77,10 +80,11 @@ define <8 x half> @round_v8f16(<8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -99,10 +103,11 @@ define <16 x half> @round_v16f16(<16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -122,10 +127,11 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -250,88 +256,160 @@ define <16 x float> @round_v16f32(<16 x float> %x) strictfp {
 declare <16 x float> @llvm.experimental.constrained.round.v16f32(<16 x float>, metadata)
 
 define <1 x double> @round_v1f64(<1 x double> %x) strictfp {
-; CHECK-LABEL: round_v1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_v1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_v1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %x, metadata !"fpexcept.strict")
   ret <1 x double> %a
 }
 declare <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double>, metadata)
 
 define <2 x double> @round_v2f64(<2 x double> %x) strictfp {
-; CHECK-LABEL: round_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_v2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_v2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %x, metadata !"fpexcept.strict")
   ret <2 x double> %a
 }
 declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata)
 
 define <4 x double> @round_v4f64(<4 x double> %x) strictfp {
-; CHECK-LABEL: round_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double> %x, metadata !"fpexcept.strict")
   ret <4 x double> %a
 }
 declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata)
 
 define <8 x double> @round_v8f64(<8 x double> %x) strictfp {
-; CHECK-LABEL: round_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_v8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_v8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <8 x double> @llvm.experimental.constrained.round.v8f64(<8 x double> %x, metadata !"fpexcept.strict")
   ret <8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll
index 3f1bc03435840be..506c1c53290dea5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll
@@ -1,22 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
 
 ; This file tests the code generation for `llvm.round.*` on fixed vector type.
 
 define <1 x half> @round_v1f16(<1 x half> %x) {
 ; ZVFH-LABEL: round_v1f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -52,10 +53,11 @@ declare <1 x half> @llvm.round.v1f16(<1 x half>)
 define <2 x half> @round_v2f16(<2 x half> %x) {
 ; ZVFH-LABEL: round_v2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -91,10 +93,11 @@ declare <2 x half> @llvm.round.v2f16(<2 x half>)
 define <4 x half> @round_v4f16(<4 x half> %x) {
 ; ZVFH-LABEL: round_v4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -130,10 +133,11 @@ declare <4 x half> @llvm.round.v4f16(<4 x half>)
 define <8 x half> @round_v8f16(<8 x half> %x) {
 ; ZVFH-LABEL: round_v8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -169,10 +173,11 @@ declare <8 x half> @llvm.round.v8f16(<8 x half>)
 define <16 x half> @round_v16f16(<16 x half> %x) {
 ; ZVFH-LABEL: round_v16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -208,11 +213,12 @@ declare <16 x half> @llvm.round.v16f16(<16 x half>)
 define <32 x half> @round_v32f16(<32 x half> %x) {
 ; ZVFH-LABEL: round_v32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; ZVFH-NEXT:    li a0, 32
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -347,80 +353,268 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
 declare <16 x float> @llvm.round.v16f32(<16 x float>)
 
 define <1 x double> @round_v1f64(<1 x double> %x) {
-; CHECK-LABEL: round_v1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: round_v1f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: round_v1f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: round_v1f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: round_v1f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <1 x double> @llvm.round.v1f64(<1 x double> %x)
   ret <1 x double> %a
 }
 declare <1 x double> @llvm.round.v1f64(<1 x double>)
 
 define <2 x double> @round_v2f64(<2 x double> %x) {
-; CHECK-LABEL: round_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: round_v2f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: round_v2f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: round_v2f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: round_v2f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <2 x double> @llvm.round.v2f64(<2 x double> %x)
   ret <2 x double> %a
 }
 declare <2 x double> @llvm.round.v2f64(<2 x double>)
 
 define <4 x double> @round_v4f64(<4 x double> %x) {
-; CHECK-LABEL: round_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: round_v4f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v10, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: round_v4f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v10, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: round_v4f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: round_v4f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <4 x double> @llvm.round.v4f64(<4 x double> %x)
   ret <4 x double> %a
 }
 declare <4 x double> @llvm.round.v4f64(<4 x double>)
 
 define <8 x double> @round_v8f64(<8 x double> %x) {
-; CHECK-LABEL: round_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: round_v8f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: round_v8f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: round_v8f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: round_v8f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <8 x double> @llvm.round.v8f64(<8 x double> %x)
   ret <8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll
index 11920c7c31c9817..c684dfb74fd8e61 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; This file tests the code generation for `llvm.experimental.constrained.roundeven.*` on scalable vector type.
 
@@ -11,10 +11,11 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -33,10 +34,11 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -55,10 +57,11 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -77,10 +80,11 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -99,10 +103,11 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -122,10 +127,11 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -250,88 +256,160 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp {
 declare <16 x float> @llvm.experimental.constrained.roundeven.v16f32(<16 x float>, metadata)
 
 define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_v1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: roundeven_v1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 0
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: roundeven_v1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 0
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> %x, metadata !"fpexcept.strict")
   ret <1 x double> %a
 }
 declare <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double>, metadata)
 
 define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: roundeven_v2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 0
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: roundeven_v2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 0
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %x, metadata !"fpexcept.strict")
   ret <2 x double> %a
 }
 declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double>, metadata)
 
 define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: roundeven_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 0
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: roundeven_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 0
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> %x, metadata !"fpexcept.strict")
   ret <4 x double> %a
 }
 declare <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double>, metadata)
 
 define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: roundeven_v8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 0
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: roundeven_v8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 0
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <8 x double> @llvm.experimental.constrained.roundeven.v8f64(<8 x double> %x, metadata !"fpexcept.strict")
   ret <8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll
index 9607aa09d89d680..b977654ae80b902 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll
@@ -1,22 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
 
 ; This file tests the code generation for `llvm.roundeven.*` on fixed vector type.
 
 define <1 x half> @roundeven_v1f16(<1 x half> %x) {
 ; ZVFH-LABEL: roundeven_v1f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -52,10 +53,11 @@ declare <1 x half> @llvm.roundeven.v1f16(<1 x half>)
 define <2 x half> @roundeven_v2f16(<2 x half> %x) {
 ; ZVFH-LABEL: roundeven_v2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -91,10 +93,11 @@ declare <2 x half> @llvm.roundeven.v2f16(<2 x half>)
 define <4 x half> @roundeven_v4f16(<4 x half> %x) {
 ; ZVFH-LABEL: roundeven_v4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -130,10 +133,11 @@ declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
 define <8 x half> @roundeven_v8f16(<8 x half> %x) {
 ; ZVFH-LABEL: roundeven_v8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -169,10 +173,11 @@ declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
 define <16 x half> @roundeven_v16f16(<16 x half> %x) {
 ; ZVFH-LABEL: roundeven_v16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -208,11 +213,12 @@ declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
 define <32 x half> @roundeven_v32f16(<32 x half> %x) {
 ; ZVFH-LABEL: roundeven_v32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a0, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; ZVFH-NEXT:    li a0, 32
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -347,80 +353,268 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) {
 declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
 
 define <1 x double> @roundeven_v1f64(<1 x double> %x) {
-; CHECK-LABEL: roundeven_v1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: roundeven_v1f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 0
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: roundeven_v1f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 0
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: roundeven_v1f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: roundeven_v1f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %x)
   ret <1 x double> %a
 }
 declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
 
 define <2 x double> @roundeven_v2f64(<2 x double> %x) {
-; CHECK-LABEL: roundeven_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: roundeven_v2f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 0
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: roundeven_v2f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 0
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: roundeven_v2f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: roundeven_v2f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x)
   ret <2 x double> %a
 }
 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
 
 define <4 x double> @roundeven_v4f64(<4 x double> %x) {
-; CHECK-LABEL: roundeven_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: roundeven_v4f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v10, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 0
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: roundeven_v4f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v10, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 0
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: roundeven_v4f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: roundeven_v4f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %x)
   ret <4 x double> %a
 }
 declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
 
 define <8 x double> @roundeven_v8f64(<8 x double> %x) {
-; CHECK-LABEL: roundeven_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: roundeven_v8f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; ZVFH-RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 0
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: roundeven_v8f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 0
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: roundeven_v8f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: roundeven_v8f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 0
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %a = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %x)
   ret <8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll
index f16581444afca5d..dc0278655dad8d2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll
@@ -9,10 +9,11 @@ define <1 x half> @trunc_v1f16(<1 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -29,10 +30,11 @@ define <2 x half> @trunc_v2f16(<2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -49,10 +51,11 @@ define <4 x half> @trunc_v4f16(<4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -69,10 +72,11 @@ define <8 x half> @trunc_v8f16(<8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -89,10 +93,11 @@ define <16 x half> @trunc_v16f16(<16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
@@ -110,10 +115,11 @@ define <32 x half> @trunc_v32f16(<32 x half> %x) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -226,80 +232,24 @@ define <16 x float> @trunc_v16f32(<16 x float> %x) strictfp {
 declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, metadata)
 
 define <1 x double> @trunc_v1f64(<1 x double> %x) strictfp {
-; CHECK-LABEL: trunc_v1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %x, metadata !"fpexcept.strict")
   ret <1 x double> %a
 }
 declare <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double>, metadata)
 
 define <2 x double> @trunc_v2f64(<2 x double> %x) strictfp {
-; CHECK-LABEL: trunc_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %x, metadata !"fpexcept.strict")
   ret <2 x double> %a
 }
 declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
 
 define <4 x double> @trunc_v4f64(<4 x double> %x) strictfp {
-; CHECK-LABEL: trunc_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double> %x, metadata !"fpexcept.strict")
   ret <4 x double> %a
 }
 declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata)
 
 define <8 x double> @trunc_v8f64(<8 x double> %x) strictfp {
-; CHECK-LABEL: trunc_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %x, metadata !"fpexcept.strict")
   ret <8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
index d9958f4aae35003..0a17f7dca769658 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
@@ -9,10 +9,11 @@ declare <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half>, <2 x i1>, i32)
 define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
@@ -30,10 +31,11 @@ define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %
 define <2 x half> @vp_nearbyint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v2f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -53,10 +55,11 @@ declare <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half>, <4 x i1>, i32)
 define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
@@ -74,10 +77,11 @@ define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %
 define <4 x half> @vp_nearbyint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v4f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +101,11 @@ declare <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half>, <8 x i1>, i32)
 define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
@@ -118,10 +123,11 @@ define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %
 define <8 x half> @vp_nearbyint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v8f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -142,10 +148,11 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe
 ; CHECK-LABEL: vp_nearbyint_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
@@ -164,10 +171,11 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe
 define <16 x half> @vp_nearbyint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v16f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI7_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -365,41 +373,11 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext
 declare <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double>, <2 x i1>, i32)
 
 define <2 x double> @vp_nearbyint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
   ret <2 x double> %v
 }
 
 define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
@@ -409,43 +387,11 @@ define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext %
 declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32)
 
 define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
 }
 
 define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -455,43 +401,11 @@ define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext %
 declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32)
 
 define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
 
 define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -501,43 +415,11 @@ define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext %
 declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32)
 
 define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v15f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
 }
 
 define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v15f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
   %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -547,43 +429,11 @@ define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroex
 declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32)
 
 define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
 }
 
 define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -593,104 +443,11 @@ define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroex
 declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32)
 
 define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v32f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v1, v0, 2
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    frflags a1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v1
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v1
-; CHECK-NEXT:    vmflt.vf v1, v16, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v1
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv.v.v v16, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
   %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
 
 define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_v32f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    frflags a1
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
index 3e0fb3009c6b193..49d56bf0545b53a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x half> @llvm.vp.rint.v2f16(<2 x half>, <2 x i1>, i32)
 
 define <2 x half> @vp_rint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
@@ -28,10 +29,11 @@ define <2 x half> @vp_rint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
 define <2 x half> @vp_rint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v2f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -49,10 +51,11 @@ declare <4 x half> @llvm.vp.rint.v4f16(<4 x half>, <4 x i1>, i32)
 define <4 x half> @vp_rint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
@@ -68,10 +71,11 @@ define <4 x half> @vp_rint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
 define <4 x half> @vp_rint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v4f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -89,10 +93,11 @@ declare <8 x half> @llvm.vp.rint.v8f16(<8 x half>, <8 x i1>, i32)
 define <8 x half> @vp_rint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -108,10 +113,11 @@ define <8 x half> @vp_rint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 define <8 x half> @vp_rint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v8f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -130,10 +136,11 @@ define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
 ; CHECK-LABEL: vp_rint_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -150,10 +157,11 @@ define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
 define <16 x half> @vp_rint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v16f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI7_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
@@ -333,37 +341,67 @@ define <16 x float> @vp_rint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl)
 declare <2 x double> @llvm.vp.rint.v2f64(<2 x double>, <2 x i1>, i32)
 
 define <2 x double> @vp_rint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI16_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
   ret <2 x double> %v
 }
 
 define <2 x double> @vp_rint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v2f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v2f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
@@ -373,39 +411,71 @@ define <2 x double> @vp_rint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl)
 declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
 
 define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI18_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
 }
 
 define <4 x double> @vp_rint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v4f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI19_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v4f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -415,39 +485,71 @@ define <4 x double> @vp_rint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl)
 declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
 
 define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI20_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
 
 define <8 x double> @vp_rint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v8f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI21_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v8f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -457,39 +559,71 @@ define <8 x double> @vp_rint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl)
 declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32)
 
 define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v15f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v15f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v15f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
 }
 
 define <15 x double> @vp_rint_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v15f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v15f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v15f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
   %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -499,39 +633,71 @@ define <15 x double> @vp_rint_v15f64_unmasked(<15 x double> %va, i32 zeroext %ev
 declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
 
 define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
 }
 
 define <16 x double> @vp_rint_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v16f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v16f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -541,104 +707,201 @@ define <16 x double> @vp_rint_v16f64_unmasked(<16 x double> %va, i32 zeroext %ev
 declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32)
 
 define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v32f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v24, v0, 2
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v8, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV32-NEXT:    vmv1r.v v25, v0
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vslidedown.vi v24, v0, 2
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB26_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB26_2:
+; RV32-NEXT:    lui a2, %hi(.LCPI26_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v25
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v25
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vfabs.v v8, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    vmv1r.v v25, v0
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB26_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB26_2:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v25
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    li a1, 1075
+; RV64-NEXT:    slli a1, a1, 52
+; RV64-NEXT:    fmv.d.x fa5, a1
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v25
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vfabs.v v8, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
   %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
 
 define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_v32f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_rint_v32f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB27_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB27_2:
+; RV32-NEXT:    lui a2, %hi(.LCPI27_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8
+; RV32-NEXT:    vmflt.vf v0, v24, fa5
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v16
+; RV32-NEXT:    vmflt.vf v0, v24, fa5
+; RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_rint_v32f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB27_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB27_2:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8
+; RV64-NEXT:    li a1, 1075
+; RV64-NEXT:    slli a1, a1, 52
+; RV64-NEXT:    fmv.d.x fa5, a1
+; RV64-NEXT:    vmflt.vf v0, v24, fa5
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v16
+; RV64-NEXT:    vmflt.vf v0, v24, fa5
+; RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
index 504982111d055b5..a79fb30ddeff40e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
@@ -1,22 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
 
 declare <2 x half> @llvm.vp.round.v2f16(<2 x half>, <2 x i1>, i32)
 
 define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -55,10 +56,11 @@ define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
 define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <4 x half> @llvm.vp.round.v4f16(<4 x half>, <4 x i1>, i32)
 define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -139,10 +142,11 @@ define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
 define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <8 x half> @llvm.vp.round.v8f16(<8 x half>, <8 x i1>, i32)
 define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -225,10 +230,11 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vp_round_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -313,10 +320,11 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
 define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -533,41 +541,141 @@ define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl
 declare <2 x double> @llvm.vp.round.v2f64(<2 x double>, <2 x i1>, i32)
 
 define <2 x double> @vp_round_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v2f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI16_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v2f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v2f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI16_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v2f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
   ret <2 x double> %v
 }
 
 define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v2f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI17_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v2f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v2f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI17_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v2f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
@@ -577,43 +685,149 @@ define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl)
 declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32)
 
 define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v4f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v10, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v10
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v4f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v10, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v10
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v4f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v4f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
 }
 
 define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v4f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI19_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v10, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v4f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v10, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v4f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI19_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v4f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -623,43 +837,149 @@ define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl)
 declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32)
 
 define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v8f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v12, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v12
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v8f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v12, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v12
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v8f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v8f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
 
 define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v8f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI21_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v8f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v8f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI21_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v8f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -669,43 +989,149 @@ define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl)
 declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32)
 
 define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v15f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v15f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v15f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v15f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v15f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
 }
 
 define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v15f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v15f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v15f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v15f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v15f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
   %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -715,43 +1141,149 @@ define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %e
 declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32)
 
 define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v16f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v16f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v16f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v16f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
 }
 
 define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v16f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v16f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v16f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v16f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -761,112 +1293,425 @@ define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %e
 declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32)
 
 define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v32f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v24, v0, 2
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a1, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v32f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    addi sp, sp, -16
+; ZVFH-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-RV32-NEXT:    csrr a1, vlenb
+; ZVFH-RV32-NEXT:    slli a1, a1, 4
+; ZVFH-RV32-NEXT:    sub sp, sp, a1
+; ZVFH-RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-RV32-NEXT:    vmv1r.v v25, v0
+; ZVFH-RV32-NEXT:    csrr a1, vlenb
+; ZVFH-RV32-NEXT:    slli a1, a1, 3
+; ZVFH-RV32-NEXT:    add a1, sp, a1
+; ZVFH-RV32-NEXT:    addi a1, a1, 16
+; ZVFH-RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFH-RV32-NEXT:    li a2, 16
+; ZVFH-RV32-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFH-RV32-NEXT:    mv a1, a0
+; ZVFH-RV32-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFH-RV32-NEXT:  # %bb.1:
+; ZVFH-RV32-NEXT:    li a1, 16
+; ZVFH-RV32-NEXT:  .LBB26_2:
+; ZVFH-RV32-NEXT:    lui a2, %hi(.LCPI26_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
+; ZVFH-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a1, 4
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a1
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFH-RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFH-RV32-NEXT:    addi a1, a0, -16
+; ZVFH-RV32-NEXT:    sltu a0, a0, a1
+; ZVFH-RV32-NEXT:    addi a0, a0, -1
+; ZVFH-RV32-NEXT:    and a0, a0, a1
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV32-NEXT:    csrr a0, vlenb
+; ZVFH-RV32-NEXT:    slli a0, a0, 3
+; ZVFH-RV32-NEXT:    add a0, sp, a0
+; ZVFH-RV32-NEXT:    addi a0, a0, 16
+; ZVFH-RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-RV32-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFH-RV32-NEXT:    addi a0, sp, 16
+; ZVFH-RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFH-RV32-NEXT:    csrr a0, vlenb
+; ZVFH-RV32-NEXT:    slli a0, a0, 4
+; ZVFH-RV32-NEXT:    add sp, sp, a0
+; ZVFH-RV32-NEXT:    addi sp, sp, 16
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v32f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    addi sp, sp, -16
+; ZVFH-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-RV64-NEXT:    csrr a1, vlenb
+; ZVFH-RV64-NEXT:    slli a1, a1, 4
+; ZVFH-RV64-NEXT:    sub sp, sp, a1
+; ZVFH-RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-RV64-NEXT:    vmv1r.v v25, v0
+; ZVFH-RV64-NEXT:    csrr a1, vlenb
+; ZVFH-RV64-NEXT:    slli a1, a1, 3
+; ZVFH-RV64-NEXT:    add a1, sp, a1
+; ZVFH-RV64-NEXT:    addi a1, a1, 16
+; ZVFH-RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFH-RV64-NEXT:    li a2, 16
+; ZVFH-RV64-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFH-RV64-NEXT:    mv a1, a0
+; ZVFH-RV64-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFH-RV64-NEXT:  # %bb.1:
+; ZVFH-RV64-NEXT:    li a1, 16
+; ZVFH-RV64-NEXT:  .LBB26_2:
+; ZVFH-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    li a1, 1075
+; ZVFH-RV64-NEXT:    slli a1, a1, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a1, 4
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a1
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFH-RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFH-RV64-NEXT:    addi a1, a0, -16
+; ZVFH-RV64-NEXT:    sltu a0, a0, a1
+; ZVFH-RV64-NEXT:    addi a0, a0, -1
+; ZVFH-RV64-NEXT:    and a0, a0, a1
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV64-NEXT:    csrr a0, vlenb
+; ZVFH-RV64-NEXT:    slli a0, a0, 3
+; ZVFH-RV64-NEXT:    add a0, sp, a0
+; ZVFH-RV64-NEXT:    addi a0, a0, 16
+; ZVFH-RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-RV64-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFH-RV64-NEXT:    addi a0, sp, 16
+; ZVFH-RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFH-RV64-NEXT:    csrr a0, vlenb
+; ZVFH-RV64-NEXT:    slli a0, a0, 4
+; ZVFH-RV64-NEXT:    add sp, sp, a0
+; ZVFH-RV64-NEXT:    addi sp, sp, 16
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v32f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a1, a1, 4
+; ZVFHMIN-RV32-NEXT:    sub sp, sp, a1
+; ZVFHMIN-RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v25, v0
+; ZVFHMIN-RV32-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a1, a1, 3
+; ZVFHMIN-RV32-NEXT:    add a1, sp, a1
+; ZVFHMIN-RV32-NEXT:    addi a1, a1, 16
+; ZVFHMIN-RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    li a2, 16
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFHMIN-RV32-NEXT:    mv a1, a0
+; ZVFHMIN-RV32-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFHMIN-RV32-NEXT:  # %bb.1:
+; ZVFHMIN-RV32-NEXT:    li a1, 16
+; ZVFHMIN-RV32-NEXT:  .LBB26_2:
+; ZVFHMIN-RV32-NEXT:    lui a2, %hi(.LCPI26_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a1, 4
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a1
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV32-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV32-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV32-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a0, a0, 3
+; ZVFHMIN-RV32-NEXT:    add a0, sp, a0
+; ZVFHMIN-RV32-NEXT:    addi a0, a0, 16
+; ZVFHMIN-RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    addi a0, sp, 16
+; ZVFHMIN-RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV32-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a0, a0, 4
+; ZVFHMIN-RV32-NEXT:    add sp, sp, a0
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v32f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 4
+; ZVFHMIN-RV64-NEXT:    sub sp, sp, a1
+; ZVFHMIN-RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v25, v0
+; ZVFHMIN-RV64-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 3
+; ZVFHMIN-RV64-NEXT:    add a1, sp, a1
+; ZVFHMIN-RV64-NEXT:    addi a1, a1, 16
+; ZVFHMIN-RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVFHMIN-RV64-NEXT:    li a2, 16
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v24, v0, 2
+; ZVFHMIN-RV64-NEXT:    mv a1, a0
+; ZVFHMIN-RV64-NEXT:    bltu a0, a2, .LBB26_2
+; ZVFHMIN-RV64-NEXT:  # %bb.1:
+; ZVFHMIN-RV64-NEXT:    li a1, 16
+; ZVFHMIN-RV64-NEXT:  .LBB26_2:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a1, 1075
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a1, 4
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a1
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV64-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV64-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV64-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV64-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 3
+; ZVFHMIN-RV64-NEXT:    add a0, sp, a0
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 16
+; ZVFHMIN-RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v24, v8, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    addi a0, sp, 16
+; ZVFHMIN-RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV64-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 4
+; ZVFHMIN-RV64-NEXT:    add sp, sp, a0
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
 
 define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_v32f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a1, 4
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_round_v32f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    li a2, 16
+; ZVFH-RV32-NEXT:    mv a1, a0
+; ZVFH-RV32-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFH-RV32-NEXT:  # %bb.1:
+; ZVFH-RV32-NEXT:    li a1, 16
+; ZVFH-RV32-NEXT:  .LBB27_2:
+; ZVFH-RV32-NEXT:    lui a2, %hi(.LCPI27_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
+; ZVFH-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV32-NEXT:    fsrmi a1, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    fsrm a1
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    addi a1, a0, -16
+; ZVFH-RV32-NEXT:    sltu a0, a0, a1
+; ZVFH-RV32-NEXT:    addi a0, a0, -1
+; ZVFH-RV32-NEXT:    and a0, a0, a1
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v16
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV32-NEXT:    fsrmi a0, 4
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFH-RV32-NEXT:    fsrm a0
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_round_v32f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    li a2, 16
+; ZVFH-RV64-NEXT:    mv a1, a0
+; ZVFH-RV64-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFH-RV64-NEXT:  # %bb.1:
+; ZVFH-RV64-NEXT:    li a1, 16
+; ZVFH-RV64-NEXT:  .LBB27_2:
+; ZVFH-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8
+; ZVFH-RV64-NEXT:    li a1, 1075
+; ZVFH-RV64-NEXT:    slli a1, a1, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV64-NEXT:    fsrmi a1, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    fsrm a1
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    addi a1, a0, -16
+; ZVFH-RV64-NEXT:    sltu a0, a0, a1
+; ZVFH-RV64-NEXT:    addi a0, a0, -1
+; ZVFH-RV64-NEXT:    and a0, a0, a1
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v16
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV64-NEXT:    fsrmi a0, 4
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFH-RV64-NEXT:    fsrm a0
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_round_v32f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    li a2, 16
+; ZVFHMIN-RV32-NEXT:    mv a1, a0
+; ZVFHMIN-RV32-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFHMIN-RV32-NEXT:  # %bb.1:
+; ZVFHMIN-RV32-NEXT:    li a1, 16
+; ZVFHMIN-RV32-NEXT:  .LBB27_2:
+; ZVFHMIN-RV32-NEXT:    lui a2, %hi(.LCPI27_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a1, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a1
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV32-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV32-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v16
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a0
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_round_v32f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    li a2, 16
+; ZVFHMIN-RV64-NEXT:    mv a1, a0
+; ZVFHMIN-RV64-NEXT:    bltu a0, a2, .LBB27_2
+; ZVFHMIN-RV64-NEXT:  # %bb.1:
+; ZVFHMIN-RV64-NEXT:    li a1, 16
+; ZVFHMIN-RV64-NEXT:  .LBB27_2:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8
+; ZVFHMIN-RV64-NEXT:    li a1, 1075
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a1
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a1, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a1
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    addi a1, a0, -16
+; ZVFHMIN-RV64-NEXT:    sltu a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, -1
+; ZVFHMIN-RV64-NEXT:    and a0, a0, a1
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v16
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a0, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a0
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
index 35480164d4a12dd..68555bf246fca3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
@@ -13,10 +13,11 @@ declare <2 x half> @llvm.vp.roundeven.v2f16(<2 x half>, <2 x i1>, i32)
 define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -55,10 +56,11 @@ define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %
 define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <4 x half> @llvm.vp.roundeven.v4f16(<4 x half>, <4 x i1>, i32)
 define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -139,10 +142,11 @@ define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %
 define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <8 x half> @llvm.vp.roundeven.v8f16(<8 x half>, <8 x i1>, i32)
 define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -225,10 +230,11 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %
 define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe
 ; ZVFH-LABEL: vp_roundeven_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -313,10 +320,11 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe
 define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -533,41 +541,11 @@ define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext
 declare <2 x double> @llvm.vp.roundeven.v2f64(<2 x double>, <2 x i1>, i32)
 
 define <2 x double> @vp_roundeven_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
   ret <2 x double> %v
 }
 
 define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
@@ -577,43 +555,11 @@ define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext %
 declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32)
 
 define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
 }
 
 define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -623,43 +569,11 @@ define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext %
 declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32)
 
 define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
 
 define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -669,43 +583,11 @@ define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext %
 declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32)
 
 define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v15f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
 }
 
 define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v15f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
   %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -715,43 +597,11 @@ define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroex
 declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32)
 
 define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
 }
 
 define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -761,112 +611,11 @@ define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroex
 declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32)
 
 define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v32f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v24, v0, 2
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a1, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
   %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
 
 define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_v32f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a1, 0
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
index 4928eba52ac8ca3..896ce08a2d31b48 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
@@ -13,10 +13,11 @@ declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32)
 define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -55,10 +56,11 @@ define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext
 define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half>, <4 x i1>, i32)
 define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -139,10 +142,11 @@ define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext
 define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half>, <8 x i1>, i32)
 define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -225,10 +230,11 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext
 define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer
 ; ZVFH-LABEL: vp_roundtozero_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -313,10 +320,11 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer
 define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -533,41 +541,11 @@ define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroex
 declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32)
 
 define <2 x double> @vp_roundtozero_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI16_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI16_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
   ret <2 x double> %v
 }
 
 define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI17_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI17_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
@@ -577,43 +555,11 @@ define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext
 declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32)
 
 define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
 }
 
 define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -623,43 +569,11 @@ define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext
 declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32)
 
 define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
 
 define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI21_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -669,43 +583,11 @@ define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext
 declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32)
 
 define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v15f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
 }
 
 define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v15f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
   %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -715,43 +597,11 @@ define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zero
 declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32)
 
 define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
 }
 
 define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -761,112 +611,11 @@ define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zero
 declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32)
 
 define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v32f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v24, v0, 2
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a1, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
   %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
 
 define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_v32f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    bltu a0, a2, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    lui a2, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a2)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a1, 1
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    addi a1, a0, -16
-; CHECK-NEXT:    sltu a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
index 6c4f523aa8d9480..9ba9234b30315e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x half> @vp_floor_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
@@ -30,10 +31,11 @@ define <vscale x 1 x half> @vp_floor_nxv1f16(<vscale x 1 x half> %va, <vscale x
 define <vscale x 1 x half> @vp_floor_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv1f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -53,10 +55,11 @@ declare <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half>, <vscale
 define <vscale x 2 x half> @vp_floor_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
@@ -74,10 +77,11 @@ define <vscale x 2 x half> @vp_floor_nxv2f16(<vscale x 2 x half> %va, <vscale x
 define <vscale x 2 x half> @vp_floor_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv2f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +101,11 @@ declare <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half>, <vscale
 define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
@@ -118,10 +123,11 @@ define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x
 define <vscale x 4 x half> @vp_floor_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv4f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -142,10 +148,11 @@ define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: vp_floor_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
@@ -164,10 +171,11 @@ define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x
 define <vscale x 8 x half> @vp_floor_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv8f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI7_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -188,10 +196,11 @@ define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; CHECK-LABEL: vp_floor_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI8_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
@@ -210,10 +219,11 @@ define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale
 define <vscale x 16 x half> @vp_floor_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv16f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI9_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI9_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -234,10 +244,11 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; CHECK-LABEL: vp_floor_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI10_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
@@ -256,10 +267,11 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale
 define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv32f16_unmasked:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI11_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI11_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 2
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -503,41 +515,75 @@ define <vscale x 16 x float> @vp_floor_nxv16f32_unmasked(<vscale x 16 x float> %
 declare <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x double> @vp_floor_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @vp_floor_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv1f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv1f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv1f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -547,43 +593,79 @@ define <vscale x 1 x double> @vp_floor_nxv1f64_unmasked(<vscale x 1 x double> %v
 declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @vp_floor_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv2f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv2f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -593,43 +675,79 @@ define <vscale x 2 x double> @vp_floor_nxv2f64_unmasked(<vscale x 2 x double> %v
 declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI26_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @vp_floor_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv4f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI27_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv4f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -639,43 +757,79 @@ define <vscale x 4 x double> @vp_floor_nxv4f64_unmasked(<vscale x 4 x double> %v
 declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
 
 define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv7f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv7f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI28_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv7f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
 }
 
 define <vscale x 7 x double> @vp_floor_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv7f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv7f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI29_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv7f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -685,43 +839,79 @@ define <vscale x 7 x double> @vp_floor_nxv7f64_unmasked(<vscale x 7 x double> %v
 declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    lui a1, %hi(.LCPI30_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @vp_floor_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv8f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI31_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv8f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -732,111 +922,215 @@ define <vscale x 8 x double> @vp_floor_nxv8f64_unmasked(<vscale x 8 x double> %v
 declare <vscale x 16 x double> @llvm.vp.floor.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 3
-; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a2, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV32-NEXT:    vmv1r.v v24, v0
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    srli a2, a1, 3
+; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vx v25, v0, a2
+; RV32-NEXT:    sub a2, a0, a1
+; RV32-NEXT:    sltu a3, a0, a2
+; RV32-NEXT:    addi a3, a3, -1
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    lui a3, %hi(.LCPI32_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v25
+; RV32-NEXT:    vfabs.v v8, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; RV32-NEXT:    fsrmi a2, 2
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v25
+; RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; RV32-NEXT:    fsrm a2
+; RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    bltu a0, a1, .LBB32_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:  .LBB32_2:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    srli a2, a1, 3
+; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vx v25, v0, a2
+; RV64-NEXT:    sub a2, a0, a1
+; RV64-NEXT:    sltu a3, a0, a2
+; RV64-NEXT:    addi a3, a3, -1
+; RV64-NEXT:    and a2, a3, a2
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v25
+; RV64-NEXT:    vfabs.v v8, v16, v0.t
+; RV64-NEXT:    li a2, 1075
+; RV64-NEXT:    slli a2, a2, 52
+; RV64-NEXT:    fmv.d.x fa5, a2
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; RV64-NEXT:    fsrmi a2, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v25
+; RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; RV64-NEXT:    fsrm a2
+; RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    bltu a0, a1, .LBB32_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB32_2:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.vp.floor.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
 
 define <vscale x 16 x double> @vp_floor_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_floor_nxv16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a2, 2
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: vp_floor_nxv16f64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    sub a2, a0, a1
+; RV32-NEXT:    lui a3, %hi(.LCPI33_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
+; RV32-NEXT:    sltu a3, a0, a2
+; RV32-NEXT:    addi a3, a3, -1
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v16
+; RV32-NEXT:    vmflt.vf v0, v24, fa5
+; RV32-NEXT:    fsrmi a2, 2
+; RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; RV32-NEXT:    fsrm a2
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; RV32-NEXT:    bltu a0, a1, .LBB33_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:  .LBB33_2:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v24, v8
+; RV32-NEXT:    vmflt.vf v0, v24, fa5
+; RV32-NEXT:    fsrmi a0, 2
+; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_floor_nxv16f64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    sub a2, a0, a1
+; RV64-NEXT:    sltu a3, a0, a2
+; RV64-NEXT:    addi a3, a3, -1
+; RV64-NEXT:    and a2, a3, a2
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v16
+; RV64-NEXT:    li a2, 1075
+; RV64-NEXT:    slli a2, a2, 52
+; RV64-NEXT:    fmv.d.x fa5, a2
+; RV64-NEXT:    vmflt.vf v0, v24, fa5
+; RV64-NEXT:    fsrmi a2, 2
+; RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; RV64-NEXT:    fsrm a2
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; RV64-NEXT:    bltu a0, a1, .LBB33_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB33_2:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v24, v8
+; RV64-NEXT:    vmflt.vf v0, v24, fa5
+; RV64-NEXT:    fsrmi a0, 2
+; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x double> @llvm.vp.floor.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll
index f90237b8d7e95d3..83347167193fba2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll
@@ -11,10 +11,11 @@ define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -34,10 +35,11 @@ define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -57,10 +59,11 @@ define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -80,10 +83,11 @@ define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %v) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -103,10 +107,11 @@ define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %v) strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -126,10 +131,11 @@ define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %v) strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -260,22 +266,6 @@ define <vscale x 16 x float> @nearbyint_nxv16f32(<vscale x 16 x float> %v) stric
 declare <vscale x 1 x double> @llvm.experimental.constrained.nearbyint.nxv1f64(<vscale x 1 x double>, metadata, metadata)
 
 define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %v) strictfp {
-; CHECK-LABEL: nearbyint_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %r = call <vscale x 1 x double> @llvm.experimental.constrained.nearbyint.nxv1f64(<vscale x 1 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x double> %r
 }
@@ -283,22 +273,6 @@ define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %v) strict
 declare <vscale x 2 x double> @llvm.experimental.constrained.nearbyint.nxv2f64(<vscale x 2 x double>, metadata, metadata)
 
 define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %v) strictfp {
-; CHECK-LABEL: nearbyint_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %r = call <vscale x 2 x double> @llvm.experimental.constrained.nearbyint.nxv2f64(<vscale x 2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %r
 }
@@ -306,22 +280,6 @@ define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %v) strict
 declare <vscale x 4 x double> @llvm.experimental.constrained.nearbyint.nxv4f64(<vscale x 4 x double>, metadata, metadata)
 
 define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %v) strictfp {
-; CHECK-LABEL: nearbyint_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %r = call <vscale x 4 x double> @llvm.experimental.constrained.nearbyint.nxv4f64(<vscale x 4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %r
 }
@@ -329,22 +287,6 @@ define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %v) strict
 declare <vscale x 8 x double> @llvm.experimental.constrained.nearbyint.nxv8f64(<vscale x 8 x double>, metadata, metadata)
 
 define <vscale x 8 x double> @nearbyint_nxv8f64(<vscale x 8 x double> %v) strictfp {
-; CHECK-LABEL: nearbyint_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %r = call <vscale x 8 x double> @llvm.experimental.constrained.nearbyint.nxv8f64(<vscale x 8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %r
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll
index 9aa356b9b65e0b9..cf5f8666e2aaecc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll
@@ -7,10 +7,11 @@
 define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: nearbyint_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -27,10 +28,11 @@ declare <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half>)
 define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: nearbyint_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -47,10 +49,11 @@ declare <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half>)
 define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: nearbyint_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -67,10 +70,11 @@ declare <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half>)
 define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %x) {
 ; CHECK-LABEL: nearbyint_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -87,10 +91,11 @@ declare <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half>)
 define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %x) {
 ; CHECK-LABEL: nearbyint_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -107,10 +112,11 @@ declare <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half>)
 define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %x) {
 ; CHECK-LABEL: nearbyint_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -225,80 +231,24 @@ define <vscale x 16 x float> @nearbyint_nxv16f32(<vscale x 16 x float> %x) {
 declare <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float>)
 
 define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %x) {
-; CHECK-LABEL: nearbyint_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double> %x)
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double>)
 
 define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %x) {
-; CHECK-LABEL: nearbyint_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> %x)
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double>)
 
 define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %x) {
-; CHECK-LABEL: nearbyint_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> %x)
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double>)
 
 define <vscale x 8 x double> @nearbyint_nxv8f64(<vscale x 8 x double> %x) {
-; CHECK-LABEL: nearbyint_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double> %x)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
index 8f36aad8172744f..322bc42a4371dd6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
@@ -152,47 +152,11 @@ define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
 }
 
 define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
-; CHECK-LABEL: test_signed_v4f64_v4i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI10_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI10_0)(a0)
-; CHECK-NEXT:    lui a0, %hi(.LCPI10_1)
-; CHECK-NEXT:    fld fa4, %lo(.LCPI10_1)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v8, fa5
-; CHECK-NEXT:    vfmin.vf v12, v12, fa4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v12
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v16, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
-; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
     ret <vscale x 4 x i16> %x
 }
 
 define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
-; CHECK-LABEL: test_signed_v8f64_v8i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_1)
-; CHECK-NEXT:    fld fa4, %lo(.LCPI11_1)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfmax.vf v16, v8, fa5
-; CHECK-NEXT:    vfmin.vf v16, v16, fa4
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v24, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmerge.vim v8, v16, 0, v0
-; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
     ret <vscale x 8 x i16> %x
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
index 242034f9826cb6e..ae6d60b495c4cd1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
@@ -168,11 +168,13 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
 ;
 ; CHECK64-LABEL: test_signed_v4f64_v4i16:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    lui a0, %hi(.LCPI10_0)
-; CHECK64-NEXT:    fld fa5, %lo(.LCPI10_0)(a0)
-; CHECK64-NEXT:    fmv.d.x fa4, zero
+; CHECK64-NEXT:    fmv.d.x fa5, zero
 ; CHECK64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK64-NEXT:    vfmax.vf v8, v8, fa4
+; CHECK64-NEXT:    vfmax.vf v8, v8, fa5
+; CHECK64-NEXT:    lui a0, 8312
+; CHECK64-NEXT:    addi a0, a0, -1
+; CHECK64-NEXT:    slli a0, a0, 37
+; CHECK64-NEXT:    fmv.d.x fa5, a0
 ; CHECK64-NEXT:    vfmin.vf v8, v8, fa5
 ; CHECK64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
@@ -200,11 +202,13 @@ define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
 ;
 ; CHECK64-LABEL: test_signed_v8f64_v8i16:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK64-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK64-NEXT:    fmv.d.x fa4, zero
+; CHECK64-NEXT:    fmv.d.x fa5, zero
 ; CHECK64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK64-NEXT:    vfmax.vf v8, v8, fa4
+; CHECK64-NEXT:    vfmax.vf v8, v8, fa5
+; CHECK64-NEXT:    lui a0, 8312
+; CHECK64-NEXT:    addi a0, a0, -1
+; CHECK64-NEXT:    slli a0, a0, 37
+; CHECK64-NEXT:    fmv.d.x fa5, a0
 ; CHECK64-NEXT:    vfmin.vf v8, v8, fa5
 ; CHECK64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK64-NEXT:    vfncvt.rtz.xu.f.w v16, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll
index fb77b746549400a..44ea88dd316e02f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll
@@ -1,16 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x half> @rint_nxv1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -25,10 +26,11 @@ declare <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half>)
 define <vscale x 2 x half> @rint_nxv2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: rint_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -43,10 +45,11 @@ declare <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half>)
 define <vscale x 4 x half> @rint_nxv4f16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -61,10 +64,11 @@ declare <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half>)
 define <vscale x 8 x half> @rint_nxv8f16(<vscale x 8 x half> %x) {
 ; CHECK-LABEL: rint_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
@@ -79,10 +83,11 @@ declare <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half>)
 define <vscale x 16 x half> @rint_nxv16f16(<vscale x 16 x half> %x) {
 ; CHECK-LABEL: rint_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -97,10 +102,11 @@ declare <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half>)
 define <vscale x 32 x half> @rint_nxv32f16(<vscale x 32 x half> %x) {
 ; CHECK-LABEL: rint_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
@@ -203,72 +209,128 @@ define <vscale x 16 x float> @rint_nxv16f32(<vscale x 16 x float> %x) {
 declare <vscale x 16 x float> @llvm.rint.nxv16f32(<vscale x 16 x float>)
 
 define <vscale x 1 x double> @rint_nxv1f64(<vscale x 1 x double> %x) {
-; CHECK-LABEL: rint_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: rint_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: rint_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> %x)
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double>)
 
 define <vscale x 2 x double> @rint_nxv2f64(<vscale x 2 x double> %x) {
-; CHECK-LABEL: rint_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: rint_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: rint_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> %x)
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double>)
 
 define <vscale x 4 x double> @rint_nxv4f64(<vscale x 4 x double> %x) {
-; CHECK-LABEL: rint_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: rint_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: rint_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> %x)
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double>)
 
 define <vscale x 8 x double> @rint_nxv8f64(<vscale x 8 x double> %x) {
-; CHECK-LABEL: rint_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: rint_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: rint_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.rint.nxv8f64(<vscale x 8 x double> %x)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll
index 3276f481f30ea5e..d42278b823f492c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; This file tests the code generation for `llvm.experimental.constrained.round.*` on scalable vector type.
 
@@ -11,10 +11,11 @@ define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -33,10 +34,11 @@ define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -55,10 +57,11 @@ define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -77,10 +80,11 @@ define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -99,10 +103,11 @@ define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -121,10 +126,11 @@ define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -249,88 +255,160 @@ define <vscale x 16 x float> @round_nxv16f32(<vscale x 16 x float> %x) strictfp
 declare <vscale x 16 x float> @llvm.experimental.constrained.round.nxv16f32(<vscale x 16 x float>, metadata)
 
 define <vscale x 1 x double> @round_nxv1f64(<vscale x 1 x double> %x) strictfp {
-; CHECK-LABEL: round_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.experimental.constrained.round.nxv1f64(<vscale x 1 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.experimental.constrained.round.nxv1f64(<vscale x 1 x double>, metadata)
 
 define <vscale x 2 x double> @round_nxv2f64(<vscale x 2 x double> %x) strictfp {
-; CHECK-LABEL: round_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.experimental.constrained.round.nxv2f64(<vscale x 2 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.experimental.constrained.round.nxv2f64(<vscale x 2 x double>, metadata)
 
 define <vscale x 4 x double> @round_nxv4f64(<vscale x 4 x double> %x) strictfp {
-; CHECK-LABEL: round_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.experimental.constrained.round.nxv4f64(<vscale x 4 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.experimental.constrained.round.nxv4f64(<vscale x 4 x double>, metadata)
 
 define <vscale x 8 x double> @round_nxv8f64(<vscale x 8 x double> %x) strictfp {
-; CHECK-LABEL: round_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vmfne.vv v0, v8, v8
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmfne.vv v0, v8, v8
+; RV64-NEXT:    vfadd.vv v8, v8, v8, v0.t
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.experimental.constrained.round.nxv8f64(<vscale x 8 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll
index bb6724eeb320067..74df5590851fd50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; This file tests the code generation for `llvm.round.*` on scalable vector type.
 
 define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: round_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -29,10 +30,11 @@ declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>)
 define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: round_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -49,10 +51,11 @@ declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>)
 define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: round_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -69,10 +72,11 @@ declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>)
 define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) {
 ; CHECK-LABEL: round_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -89,10 +93,11 @@ declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>)
 define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) {
 ; CHECK-LABEL: round_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -109,10 +114,11 @@ declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>)
 define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) {
 ; CHECK-LABEL: round_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 4
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -227,80 +233,144 @@ define <vscale x 16 x float> @round_nxv16f32(<vscale x 16 x float> %x) {
 declare <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float>)
 
 define <vscale x 1 x double> @round_nxv1f64(<vscale x 1 x double> %x) {
-; CHECK-LABEL: round_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vfabs.v v9, v8
+; RV32-NEXT:    vmflt.vf v0, v9, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT:    vfabs.v v9, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v9, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> %x)
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double>)
 
 define <vscale x 2 x double> @round_nxv2f64(<vscale x 2 x double> %x) {
-; CHECK-LABEL: round_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vfabs.v v10, v8
+; RV32-NEXT:    vmflt.vf v0, v10, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT:    vfabs.v v10, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v10, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> %x)
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>)
 
 define <vscale x 4 x double> @round_nxv4f64(<vscale x 4 x double> %x) {
-; CHECK-LABEL: round_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vfabs.v v12, v8
+; RV32-NEXT:    vmflt.vf v0, v12, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT:    vfabs.v v12, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v12, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> %x)
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double>)
 
 define <vscale x 8 x double> @round_nxv8f64(<vscale x 8 x double> %x) {
-; CHECK-LABEL: round_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; RV32-LABEL: round_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vfabs.v v16, v8
+; RV32-NEXT:    vmflt.vf v0, v16, fa5
+; RV32-NEXT:    fsrmi a0, 4
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    fsrm a0
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: round_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV64-NEXT:    vfabs.v v16, v8
+; RV64-NEXT:    li a0, 1075
+; RV64-NEXT:    slli a0, a0, 52
+; RV64-NEXT:    fmv.d.x fa5, a0
+; RV64-NEXT:    vmflt.vf v0, v16, fa5
+; RV64-NEXT:    fsrmi a0, 4
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    fsrm a0
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> %x)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll
index 4ebfcccbaaa6e6c..3d259b1e33f1c3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll
@@ -11,10 +11,11 @@ define <vscale x 1 x half> @roundeven_nxv1f16(<vscale x 1 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -33,10 +34,11 @@ define <vscale x 2 x half> @roundeven_nxv2f16(<vscale x 2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -55,10 +57,11 @@ define <vscale x 4 x half> @roundeven_nxv4f16(<vscale x 4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -77,10 +80,11 @@ define <vscale x 8 x half> @roundeven_nxv8f16(<vscale x 8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -99,10 +103,11 @@ define <vscale x 16 x half> @roundeven_nxv16f16(<vscale x 16 x half> %x) strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -121,10 +126,11 @@ define <vscale x 32 x half> @roundeven_nxv32f16(<vscale x 32 x half> %x) strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -249,88 +255,24 @@ define <vscale x 16 x float> @roundeven_nxv16f32(<vscale x 16 x float> %x) stric
 declare <vscale x 16 x float> @llvm.experimental.constrained.roundeven.nxv16f32(<vscale x 16 x float>, metadata)
 
 define <vscale x 1 x double> @roundeven_nxv1f64(<vscale x 1 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.experimental.constrained.roundeven.nxv1f64(<vscale x 1 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.experimental.constrained.roundeven.nxv1f64(<vscale x 1 x double>, metadata)
 
 define <vscale x 2 x double> @roundeven_nxv2f64(<vscale x 2 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.experimental.constrained.roundeven.nxv2f64(<vscale x 2 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.experimental.constrained.roundeven.nxv2f64(<vscale x 2 x double>, metadata)
 
 define <vscale x 4 x double> @roundeven_nxv4f64(<vscale x 4 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.experimental.constrained.roundeven.nxv4f64(<vscale x 4 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.experimental.constrained.roundeven.nxv4f64(<vscale x 4 x double>, metadata)
 
 define <vscale x 8 x double> @roundeven_nxv8f64(<vscale x 8 x double> %x) strictfp {
-; CHECK-LABEL: roundeven_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.experimental.constrained.roundeven.nxv8f64(<vscale x 8 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll
index 6f5207a25518f50..194fcf3d2aea569 100644
--- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll
@@ -9,10 +9,11 @@
 define <vscale x 1 x half> @roundeven_nxv1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: roundeven_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -29,10 +30,11 @@ declare <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half>)
 define <vscale x 2 x half> @roundeven_nxv2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: roundeven_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -49,10 +51,11 @@ declare <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half>)
 define <vscale x 4 x half> @roundeven_nxv4f16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: roundeven_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -69,10 +72,11 @@ declare <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half>)
 define <vscale x 8 x half> @roundeven_nxv8f16(<vscale x 8 x half> %x) {
 ; CHECK-LABEL: roundeven_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -89,10 +93,11 @@ declare <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half>)
 define <vscale x 16 x half> @roundeven_nxv16f16(<vscale x 16 x half> %x) {
 ; CHECK-LABEL: roundeven_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -109,10 +114,11 @@ declare <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half>)
 define <vscale x 32 x half> @roundeven_nxv32f16(<vscale x 32 x half> %x) {
 ; CHECK-LABEL: roundeven_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    fsrmi a0, 0
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -227,80 +233,24 @@ define <vscale x 16 x float> @roundeven_nxv16f32(<vscale x 16 x float> %x) {
 declare <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float>)
 
 define <vscale x 1 x double> @roundeven_nxv1f64(<vscale x 1 x double> %x) {
-; CHECK-LABEL: roundeven_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double> %x)
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double>)
 
 define <vscale x 2 x double> @roundeven_nxv2f64(<vscale x 2 x double> %x) {
-; CHECK-LABEL: roundeven_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double> %x)
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double>)
 
 define <vscale x 4 x double> @roundeven_nxv4f64(<vscale x 4 x double> %x) {
-; CHECK-LABEL: roundeven_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double> %x)
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double>)
 
 define <vscale x 8 x double> @roundeven_nxv8f64(<vscale x 8 x double> %x) {
-; CHECK-LABEL: roundeven_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.roundeven.nxv8f64(<vscale x 8 x double> %x)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll
index 3665669d83a3d4f..4db1d3234768c03 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll
@@ -9,10 +9,11 @@ define <vscale x 1 x half> @trunc_nxv1f16(<vscale x 1 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -29,10 +30,11 @@ define <vscale x 2 x half> @trunc_nxv2f16(<vscale x 2 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -49,10 +51,11 @@ define <vscale x 4 x half> @trunc_nxv4f16(<vscale x 4 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -69,10 +72,11 @@ define <vscale x 8 x half> @trunc_nxv8f16(<vscale x 8 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
@@ -89,10 +93,11 @@ define <vscale x 16 x half> @trunc_nxv16f16(<vscale x 16 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -109,10 +114,11 @@ define <vscale x 32 x half> @trunc_nxv32f16(<vscale x 32 x half> %x) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
@@ -225,80 +231,24 @@ define <vscale x 16 x float> @trunc_nxv16f32(<vscale x 16 x float> %x) strictfp
 declare <vscale x 16 x float> @llvm.experimental.constrained.trunc.nxv16f32(<vscale x 16 x float>, metadata)
 
 define <vscale x 1 x double> @trunc_nxv1f64(<vscale x 1 x double> %x) strictfp {
-; CHECK-LABEL: trunc_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.experimental.constrained.trunc.nxv1f64(<vscale x 1 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.experimental.constrained.trunc.nxv1f64(<vscale x 1 x double>, metadata)
 
 define <vscale x 2 x double> @trunc_nxv2f64(<vscale x 2 x double> %x) strictfp {
-; CHECK-LABEL: trunc_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.experimental.constrained.trunc.nxv2f64(<vscale x 2 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.experimental.constrained.trunc.nxv2f64(<vscale x 2 x double>, metadata)
 
 define <vscale x 4 x double> @trunc_nxv4f64(<vscale x 4 x double> %x) strictfp {
-; CHECK-LABEL: trunc_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.experimental.constrained.trunc.nxv4f64(<vscale x 4 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.experimental.constrained.trunc.nxv4f64(<vscale x 4 x double>, metadata)
 
 define <vscale x 8 x double> @trunc_nxv8f64(<vscale x 8 x double> %x) strictfp {
-; CHECK-LABEL: trunc_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.experimental.constrained.trunc.nxv8f64(<vscale x 8 x double> %x, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
index 8841232e7f76dfb..f3e55d9730c1b69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
@@ -7,10 +7,11 @@
 define <vscale x 1 x half> @trunc_nxv1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: trunc_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI0_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -25,10 +26,11 @@ declare <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half>)
 define <vscale x 2 x half> @trunc_nxv2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: trunc_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI1_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -43,10 +45,11 @@ declare <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half>)
 define <vscale x 4 x half> @trunc_nxv4f16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: trunc_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI2_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -61,10 +64,11 @@ declare <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half>)
 define <vscale x 8 x half> @trunc_nxv8f16(<vscale x 8 x half> %x) {
 ; CHECK-LABEL: trunc_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI3_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
@@ -79,10 +83,11 @@ declare <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half>)
 define <vscale x 16 x half> @trunc_nxv16f16(<vscale x 16 x half> %x) {
 ; CHECK-LABEL: trunc_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI4_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfabs.v v12, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -97,10 +102,11 @@ declare <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half>)
 define <vscale x 32 x half> @trunc_nxv32f16(<vscale x 32 x half> %x) {
 ; CHECK-LABEL: trunc_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI5_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v16, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v16, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
@@ -203,72 +209,24 @@ define <vscale x 16 x float> @trunc_nxv16f32(<vscale x 16 x float> %x) {
 declare <vscale x 16 x float> @llvm.trunc.nxv16f32(<vscale x 16 x float>)
 
 define <vscale x 1 x double> @trunc_nxv1f64(<vscale x 1 x double> %x) {
-; CHECK-LABEL: trunc_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
   ret <vscale x 1 x double> %a
 }
 declare <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double>)
 
 define <vscale x 2 x double> @trunc_nxv2f64(<vscale x 2 x double> %x) {
-; CHECK-LABEL: trunc_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> %x)
   ret <vscale x 2 x double> %a
 }
 declare <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double>)
 
 define <vscale x 4 x double> @trunc_nxv4f64(<vscale x 4 x double> %x) {
-; CHECK-LABEL: trunc_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
   ret <vscale x 4 x double> %a
 }
 declare <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double>)
 
 define <vscale x 8 x double> @trunc_nxv8f64(<vscale x 8 x double> %x) {
-; CHECK-LABEL: trunc_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI14_0)(a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %a = call <vscale x 8 x double> @llvm.trunc.nxv8f64(<vscale x 8 x double> %x)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
index e1c09032459cda9..ef68c2d07b5f2a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+zfh,+zvfh,+v -verify-machineinstrs < %s | \
-; RUN:   FileCheck %s
+; RUN:   FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+zfh,+zvfh,+v -verify-machineinstrs < %s | \
-; RUN:   FileCheck %s
+; RUN:   FileCheck %s --check-prefixes=CHECK,RV64
 
 ; ================================================================================
 ; trunc <vscale x 1 x half>
@@ -83,10 +83,11 @@ define <vscale x 1 x i32> @trunc_nxv1f16_to_ui32(<vscale x 1 x half> %x) {
 define <vscale x 1 x i64> @trunc_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: trunc_nxv1f16_to_si64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -104,10 +105,11 @@ define <vscale x 1 x i64> @trunc_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 define <vscale x 1 x i64> @trunc_nxv1f16_to_ui64(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: trunc_nxv1f16_to_ui64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI7_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI7_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -201,10 +203,11 @@ define <vscale x 4 x i32> @trunc_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
 define <vscale x 4 x i64> @trunc_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: trunc_nxv4f16_to_si64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI14_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -222,10 +225,11 @@ define <vscale x 4 x i64> @trunc_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 define <vscale x 4 x i64> @trunc_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: trunc_nxv4f16_to_ui64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI15_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI15_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -331,10 +335,11 @@ define <vscale x 1 x i32> @ceil_nxv1f16_to_ui32(<vscale x 1 x half> %x) {
 define <vscale x 1 x i64> @ceil_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: ceil_nxv1f16_to_si64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -346,40 +351,6 @@ define <vscale x 1 x i64> @ceil_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
-; RV32-LABEL: ceil_nxv1f16_to_si64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.x.f.v v8, v9
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: ceil_nxv1f16_to_si64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.x.f.v v8, v9
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
   %b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i64>
   ret <vscale x 1 x i64> %b
@@ -388,10 +359,11 @@ define <vscale x 1 x i64> @ceil_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 define <vscale x 1 x i64> @ceil_nxv1f16_to_ui64(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: ceil_nxv1f16_to_ui64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI23_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -403,40 +375,6 @@ define <vscale x 1 x i64> @ceil_nxv1f16_to_ui64(<vscale x 1 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
-; RV32-LABEL: ceil_nxv1f16_to_ui64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: ceil_nxv1f16_to_ui64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
   %b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i64>
   ret <vscale x 1 x i64> %b
@@ -533,10 +471,11 @@ define <vscale x 4 x i32> @ceil_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
 define <vscale x 4 x i64> @ceil_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: ceil_nxv4f16_to_si64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI30_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI30_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -548,40 +487,6 @@ define <vscale x 4 x i64> @ceil_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
-; RV32-LABEL: ceil_nxv4f16_to_si64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI30_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI30_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v12, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.x.f.v v8, v12
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: ceil_nxv4f16_to_si64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI30_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI30_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v12, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.x.f.v v8, v12
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %b
@@ -590,10 +495,11 @@ define <vscale x 4 x i64> @ceil_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 define <vscale x 4 x i64> @ceil_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: ceil_nxv4f16_to_ui64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI31_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI31_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -605,40 +511,6 @@ define <vscale x 4 x i64> @ceil_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
-; RV32-LABEL: ceil_nxv4f16_to_ui64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI31_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI31_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v12, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: ceil_nxv4f16_to_ui64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI31_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI31_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v12, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %b
@@ -653,10 +525,11 @@ declare <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half>)
 define <vscale x 1 x i8> @rint_nxv1f16_to_si8(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_si8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI32_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI32_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -674,10 +547,11 @@ define <vscale x 1 x i8> @rint_nxv1f16_to_si8(<vscale x 1 x half> %x) {
 define <vscale x 1 x i8> @rint_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_ui8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI33_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI33_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -695,10 +569,11 @@ define <vscale x 1 x i8> @rint_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
 define <vscale x 1 x i16> @rint_nxv1f16_to_si16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_si16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI34_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI34_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -714,10 +589,11 @@ define <vscale x 1 x i16> @rint_nxv1f16_to_si16(<vscale x 1 x half> %x) {
 define <vscale x 1 x i16> @rint_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_ui16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI35_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI35_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -733,10 +609,11 @@ define <vscale x 1 x i16> @rint_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
 define <vscale x 1 x i32> @rint_nxv1f16_to_si32(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_si32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI36_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -753,10 +630,11 @@ define <vscale x 1 x i32> @rint_nxv1f16_to_si32(<vscale x 1 x half> %x) {
 define <vscale x 1 x i32> @rint_nxv1f16_to_ui32(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_ui32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI37_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI37_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -773,10 +651,11 @@ define <vscale x 1 x i32> @rint_nxv1f16_to_ui32(<vscale x 1 x half> %x) {
 define <vscale x 1 x i64> @rint_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_si64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI38_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -786,40 +665,6 @@ define <vscale x 1 x i64> @rint_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
-; RV32-LABEL: rint_nxv1f16_to_si64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.x.f.v v8, v9
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: rint_nxv1f16_to_si64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.x.f.v v8, v9
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> %x)
   %b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i64>
   ret <vscale x 1 x i64> %b
@@ -828,10 +673,11 @@ define <vscale x 1 x i64> @rint_nxv1f16_to_si64(<vscale x 1 x half> %x) {
 define <vscale x 1 x i64> @rint_nxv1f16_to_ui64(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_ui64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI39_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -841,40 +687,6 @@ define <vscale x 1 x i64> @rint_nxv1f16_to_ui64(<vscale x 1 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
-; RV32-LABEL: rint_nxv1f16_to_ui64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: rint_nxv1f16_to_ui64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> %x)
   %b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i64>
   ret <vscale x 1 x i64> %b
@@ -889,10 +701,11 @@ declare <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half>)
 define <vscale x 4 x i8> @rint_nxv4f16_to_si8(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_si8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI40_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -910,10 +723,11 @@ define <vscale x 4 x i8> @rint_nxv4f16_to_si8(<vscale x 4 x half> %x) {
 define <vscale x 4 x i8> @rint_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_ui8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI41_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI41_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -931,10 +745,11 @@ define <vscale x 4 x i8> @rint_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
 define <vscale x 4 x i16> @rint_nxv4f16_to_si16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_si16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI42_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -950,10 +765,11 @@ define <vscale x 4 x i16> @rint_nxv4f16_to_si16(<vscale x 4 x half> %x) {
 define <vscale x 4 x i16> @rint_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_ui16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI43_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI43_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -969,10 +785,11 @@ define <vscale x 4 x i16> @rint_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
 define <vscale x 4 x i32> @rint_nxv4f16_to_si32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_si32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI44_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI44_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -989,10 +806,11 @@ define <vscale x 4 x i32> @rint_nxv4f16_to_si32(<vscale x 4 x half> %x) {
 define <vscale x 4 x i32> @rint_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_ui32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI45_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI45_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1009,10 +827,11 @@ define <vscale x 4 x i32> @rint_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
 define <vscale x 4 x i64> @rint_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_si64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI46_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI46_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1022,40 +841,6 @@ define <vscale x 4 x i64> @rint_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
-; RV32-LABEL: rint_nxv4f16_to_si64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI30_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI30_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v12, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.x.f.v v8, v12
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: rint_nxv4f16_to_si64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI30_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI30_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v12, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.x.f.v v8, v12
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %b
@@ -1064,10 +849,11 @@ define <vscale x 4 x i64> @rint_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 define <vscale x 4 x i64> @rint_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_ui64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI47_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI47_0)(a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    li a0, 25
+; CHECK-NEXT:    slli a0, a0, 10
+; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vmflt.vf v0, v9, fa5
 ; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -1077,41 +863,10 @@ define <vscale x 4 x i64> @rint_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
-; RV32-LABEL: rint_nxv4f16_to_ui64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI31_0)
-; RV32-NEXT:    flh fa5, %lo(.LCPI31_0)(a0)
-; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, fa5
-; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfwcvt.f.f.v v12, v8
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
-; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    ret
-; RV64-LABEL: rint_nxv4f16_to_ui64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI31_0)
-; RV64-NEXT:    flh fa5, %lo(.LCPI31_0)(a0)
-; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, fa5
-; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfwcvt.f.f.v v12, v8
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
-; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %b
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
index 7c354c3714c6f44..694013bd85f5bbf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
@@ -13,10 +13,11 @@ declare <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half>, <vsc
 define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv1f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
@@ -55,10 +56,11 @@ define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscal
 define <vscale x 1 x half> @vp_nearbyint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv1f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half>, <vsc
 define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
@@ -139,10 +142,11 @@ define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscal
 define <vscale x 2 x half> @vp_nearbyint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half>, <vsc
 define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
@@ -225,10 +230,11 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal
 define <vscale x 4 x half> @vp_nearbyint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal
 ; ZVFH-LABEL: vp_nearbyint_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
@@ -313,10 +320,11 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal
 define <vscale x 8 x half> @vp_nearbyint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -356,10 +364,11 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs
 ; ZVFH-LABEL: vp_nearbyint_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
@@ -401,10 +410,11 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs
 define <vscale x 16 x half> @vp_nearbyint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -444,10 +454,11 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFH-LABEL: vp_nearbyint_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
@@ -533,10 +544,11 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
 define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv32f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -844,41 +856,11 @@ define <vscale x 16 x float> @vp_nearbyint_nxv16f32_unmasked(<vscale x 16 x floa
 declare <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x double> @vp_nearbyint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @vp_nearbyint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -888,43 +870,11 @@ define <vscale x 1 x double> @vp_nearbyint_nxv1f64_unmasked(<vscale x 1 x double
 declare <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @vp_nearbyint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -934,43 +884,11 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64_unmasked(<vscale x 2 x double
 declare <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @vp_nearbyint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -980,43 +898,11 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64_unmasked(<vscale x 4 x double
 declare <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
 
 define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv7f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
 }
 
 define <vscale x 7 x double> @vp_nearbyint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -1026,43 +912,11 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64_unmasked(<vscale x 7 x double
 declare <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @vp_nearbyint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1073,122 +927,11 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64_unmasked(<vscale x 8 x double
 declare <vscale x 16 x double> @llvm.vp.nearbyint.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 3
-; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    vfabs.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
-; CHECK-NEXT:    frflags a2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v8, v16, v0.t
-; CHECK-NEXT:    fsflags a2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.vp.nearbyint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
 
 define <vscale x 16 x double> @vp_nearbyint_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_nearbyint_nxv16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    frflags a2
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a2
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    fsflags a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x double> @llvm.vp.nearbyint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
index 04761d4e7bfc4af..b21f93685004b45 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
@@ -1,22 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH-RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
 
 declare <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv1f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
@@ -51,10 +52,11 @@ define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1
 define <vscale x 1 x half> @vp_rint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv1f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -89,10 +91,11 @@ declare <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half>, <vscale x
 define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
@@ -127,10 +130,11 @@ define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2
 define <vscale x 2 x half> @vp_rint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -165,10 +169,11 @@ declare <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half>, <vscale x
 define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -205,10 +210,11 @@ define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4
 define <vscale x 4 x half> @vp_rint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
@@ -244,10 +250,11 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8
 ; ZVFH-LABEL: vp_rint_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -285,10 +292,11 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8
 define <vscale x 8 x half> @vp_rint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
@@ -324,10 +332,11 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH-LABEL: vp_rint_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -365,10 +374,11 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale
 define <vscale x 16 x half> @vp_rint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
@@ -404,10 +414,11 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFH-LABEL: vp_rint_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
@@ -488,10 +499,11 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale
 define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv32f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
 ; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
@@ -773,37 +785,125 @@ define <vscale x 16 x float> @vp_rint_nxv16f32_unmasked(<vscale x 16 x float> %v
 declare <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv1f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv1f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv1f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv1f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @vp_rint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv1f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv1f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v9, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv1f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v9, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv1f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI23_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv1f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v9, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v9, fa5
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -813,39 +913,133 @@ define <vscale x 1 x double> @vp_rint_nxv1f64_unmasked(<vscale x 1 x double> %va
 declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv2f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v10, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v10
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv2f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v10, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v10
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv2f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI24_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv2f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @vp_rint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv2f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v10, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv2f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v10, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv2f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI25_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv2f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v10, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v10, fa5
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -855,39 +1049,133 @@ define <vscale x 2 x double> @vp_rint_nxv2f64_unmasked(<vscale x 2 x double> %va
 declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv4f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v12, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI26_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v12
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv4f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v12, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v12
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv4f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI26_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv4f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @vp_rint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv4f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI27_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v12, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv4f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v12, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv4f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI27_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv4f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v12, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v12, fa5
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -897,39 +1185,133 @@ define <vscale x 4 x double> @vp_rint_nxv4f64_unmasked(<vscale x 4 x double> %va
 declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
 
 define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv7f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv7f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI28_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv7f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv7f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI28_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv7f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
 }
 
 define <vscale x 7 x double> @vp_rint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv7f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv7f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI29_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv7f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv7f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI29_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv7f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -939,39 +1321,133 @@ define <vscale x 7 x double> @vp_rint_nxv7f64_unmasked(<vscale x 7 x double> %va
 declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv8f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI30_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv8f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv8f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI30_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv8f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v16, v0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @vp_rint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv8f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    lui a1, %hi(.LCPI31_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv8f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8
+; ZVFH-RV64-NEXT:    li a0, 1075
+; ZVFH-RV64-NEXT:    slli a0, a0, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv8f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    lui a1, %hi(.LCPI31_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv8f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8
+; ZVFHMIN-RV64-NEXT:    li a0, 1075
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a0
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v16, fa5
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -982,103 +1458,389 @@ define <vscale x 8 x double> @vp_rint_nxv8f64_unmasked(<vscale x 8 x double> %va
 declare <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 3
-; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv16f64:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    addi sp, sp, -16
+; ZVFH-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-RV32-NEXT:    csrr a1, vlenb
+; ZVFH-RV32-NEXT:    slli a1, a1, 4
+; ZVFH-RV32-NEXT:    sub sp, sp, a1
+; ZVFH-RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-RV32-NEXT:    vmv1r.v v24, v0
+; ZVFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFH-RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFH-RV32-NEXT:    csrr a1, vlenb
+; ZVFH-RV32-NEXT:    srli a2, a1, 3
+; ZVFH-RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; ZVFH-RV32-NEXT:    vslidedown.vx v25, v0, a2
+; ZVFH-RV32-NEXT:    sub a2, a0, a1
+; ZVFH-RV32-NEXT:    sltu a3, a0, a2
+; ZVFH-RV32-NEXT:    addi a3, a3, -1
+; ZVFH-RV32-NEXT:    and a2, a3, a2
+; ZVFH-RV32-NEXT:    lui a3, %hi(.LCPI32_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
+; ZVFH-RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV32-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFH-RV32-NEXT:    csrr a2, vlenb
+; ZVFH-RV32-NEXT:    slli a2, a2, 3
+; ZVFH-RV32-NEXT:    add a2, sp, a2
+; ZVFH-RV32-NEXT:    addi a2, a2, 16
+; ZVFH-RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFH-RV32-NEXT:    bltu a0, a1, .LBB32_2
+; ZVFH-RV32-NEXT:  # %bb.1:
+; ZVFH-RV32-NEXT:    mv a0, a1
+; ZVFH-RV32-NEXT:  .LBB32_2:
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV32-NEXT:    addi a0, sp, 16
+; ZVFH-RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFH-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV32-NEXT:    csrr a0, vlenb
+; ZVFH-RV32-NEXT:    slli a0, a0, 3
+; ZVFH-RV32-NEXT:    add a0, sp, a0
+; ZVFH-RV32-NEXT:    addi a0, a0, 16
+; ZVFH-RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-RV32-NEXT:    csrr a0, vlenb
+; ZVFH-RV32-NEXT:    slli a0, a0, 4
+; ZVFH-RV32-NEXT:    add sp, sp, a0
+; ZVFH-RV32-NEXT:    addi sp, sp, 16
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv16f64:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    addi sp, sp, -16
+; ZVFH-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-RV64-NEXT:    csrr a1, vlenb
+; ZVFH-RV64-NEXT:    slli a1, a1, 4
+; ZVFH-RV64-NEXT:    sub sp, sp, a1
+; ZVFH-RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-RV64-NEXT:    vmv1r.v v24, v0
+; ZVFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFH-RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFH-RV64-NEXT:    csrr a1, vlenb
+; ZVFH-RV64-NEXT:    srli a2, a1, 3
+; ZVFH-RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; ZVFH-RV64-NEXT:    vslidedown.vx v25, v0, a2
+; ZVFH-RV64-NEXT:    sub a2, a0, a1
+; ZVFH-RV64-NEXT:    sltu a3, a0, a2
+; ZVFH-RV64-NEXT:    addi a3, a3, -1
+; ZVFH-RV64-NEXT:    and a2, a3, a2
+; ZVFH-RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV64-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFH-RV64-NEXT:    li a2, 1075
+; ZVFH-RV64-NEXT:    slli a2, a2, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a2
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFH-RV64-NEXT:    csrr a2, vlenb
+; ZVFH-RV64-NEXT:    slli a2, a2, 3
+; ZVFH-RV64-NEXT:    add a2, sp, a2
+; ZVFH-RV64-NEXT:    addi a2, a2, 16
+; ZVFH-RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFH-RV64-NEXT:    bltu a0, a1, .LBB32_2
+; ZVFH-RV64-NEXT:  # %bb.1:
+; ZVFH-RV64-NEXT:    mv a0, a1
+; ZVFH-RV64-NEXT:  .LBB32_2:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV64-NEXT:    addi a0, sp, 16
+; ZVFH-RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFH-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-RV64-NEXT:    csrr a0, vlenb
+; ZVFH-RV64-NEXT:    slli a0, a0, 3
+; ZVFH-RV64-NEXT:    add a0, sp, a0
+; ZVFH-RV64-NEXT:    addi a0, a0, 16
+; ZVFH-RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-RV64-NEXT:    csrr a0, vlenb
+; ZVFH-RV64-NEXT:    slli a0, a0, 4
+; ZVFH-RV64-NEXT:    add sp, sp, a0
+; ZVFH-RV64-NEXT:    addi sp, sp, 16
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv16f64:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a1, a1, 4
+; ZVFHMIN-RV32-NEXT:    sub sp, sp, a1
+; ZVFHMIN-RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v24, v0
+; ZVFHMIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV32-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV32-NEXT:    srli a2, a1, 3
+; ZVFHMIN-RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vx v25, v0, a2
+; ZVFHMIN-RV32-NEXT:    sub a2, a0, a1
+; ZVFHMIN-RV32-NEXT:    sltu a3, a0, a2
+; ZVFHMIN-RV32-NEXT:    addi a3, a3, -1
+; ZVFHMIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-RV32-NEXT:    lui a3, %hi(.LCPI32_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    csrr a2, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a2, a2, 3
+; ZVFHMIN-RV32-NEXT:    add a2, sp, a2
+; ZVFHMIN-RV32-NEXT:    addi a2, a2, 16
+; ZVFHMIN-RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-RV32-NEXT:    bltu a0, a1, .LBB32_2
+; ZVFHMIN-RV32-NEXT:  # %bb.1:
+; ZVFHMIN-RV32-NEXT:    mv a0, a1
+; ZVFHMIN-RV32-NEXT:  .LBB32_2:
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV32-NEXT:    addi a0, sp, 16
+; ZVFHMIN-RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV32-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a0, a0, 3
+; ZVFHMIN-RV32-NEXT:    add a0, sp, a0
+; ZVFHMIN-RV32-NEXT:    addi a0, a0, 16
+; ZVFHMIN-RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV32-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV32-NEXT:    slli a0, a0, 4
+; ZVFHMIN-RV32-NEXT:    add sp, sp, a0
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv16f64:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a1, a1, 4
+; ZVFHMIN-RV64-NEXT:    sub sp, sp, a1
+; ZVFHMIN-RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v24, v0
+; ZVFHMIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-RV64-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV64-NEXT:    srli a2, a1, 3
+; ZVFHMIN-RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vslidedown.vx v25, v0, a2
+; ZVFHMIN-RV64-NEXT:    sub a2, a0, a1
+; ZVFHMIN-RV64-NEXT:    sltu a3, a0, a2
+; ZVFHMIN-RV64-NEXT:    addi a3, a3, -1
+; ZVFHMIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    li a2, 1075
+; ZVFHMIN-RV64-NEXT:    slli a2, a2, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v25
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    csrr a2, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a2, a2, 3
+; ZVFHMIN-RV64-NEXT:    add a2, sp, a2
+; ZVFHMIN-RV64-NEXT:    addi a2, a2, 16
+; ZVFHMIN-RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-RV64-NEXT:    bltu a0, a1, .LBB32_2
+; ZVFHMIN-RV64-NEXT:  # %bb.1:
+; ZVFHMIN-RV64-NEXT:    mv a0, a1
+; ZVFHMIN-RV64-NEXT:  .LBB32_2:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV64-NEXT:    addi a0, sp, 16
+; ZVFHMIN-RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV64-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vmv1r.v v0, v24
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 3
+; ZVFHMIN-RV64-NEXT:    add a0, sp, a0
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 16
+; ZVFHMIN-RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-RV64-NEXT:    csrr a0, vlenb
+; ZVFHMIN-RV64-NEXT:    slli a0, a0, 4
+; ZVFHMIN-RV64-NEXT:    add sp, sp, a0
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV64-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
 
 define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_rint_nxv16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
+; ZVFH-RV32-LABEL: vp_rint_nxv16f64_unmasked:
+; ZVFH-RV32:       # %bb.0:
+; ZVFH-RV32-NEXT:    csrr a1, vlenb
+; ZVFH-RV32-NEXT:    sub a2, a0, a1
+; ZVFH-RV32-NEXT:    lui a3, %hi(.LCPI33_0)
+; ZVFH-RV32-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
+; ZVFH-RV32-NEXT:    sltu a3, a0, a2
+; ZVFH-RV32-NEXT:    addi a3, a3, -1
+; ZVFH-RV32-NEXT:    and a2, a3, a2
+; ZVFH-RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v16
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFH-RV32-NEXT:    bltu a0, a1, .LBB33_2
+; ZVFH-RV32-NEXT:  # %bb.1:
+; ZVFH-RV32-NEXT:    mv a0, a1
+; ZVFH-RV32-NEXT:  .LBB33_2:
+; ZVFH-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV32-NEXT:    vfabs.v v24, v8
+; ZVFH-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV32-NEXT:    ret
+;
+; ZVFH-RV64-LABEL: vp_rint_nxv16f64_unmasked:
+; ZVFH-RV64:       # %bb.0:
+; ZVFH-RV64-NEXT:    csrr a1, vlenb
+; ZVFH-RV64-NEXT:    sub a2, a0, a1
+; ZVFH-RV64-NEXT:    sltu a3, a0, a2
+; ZVFH-RV64-NEXT:    addi a3, a3, -1
+; ZVFH-RV64-NEXT:    and a2, a3, a2
+; ZVFH-RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v16
+; ZVFH-RV64-NEXT:    li a2, 1075
+; ZVFH-RV64-NEXT:    slli a2, a2, 52
+; ZVFH-RV64-NEXT:    fmv.d.x fa5, a2
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFH-RV64-NEXT:    bltu a0, a1, .LBB33_2
+; ZVFH-RV64-NEXT:  # %bb.1:
+; ZVFH-RV64-NEXT:    mv a0, a1
+; ZVFH-RV64-NEXT:  .LBB33_2:
+; ZVFH-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFH-RV64-NEXT:    vfabs.v v24, v8
+; ZVFH-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFH-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFH-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-RV32-LABEL: vp_rint_nxv16f64_unmasked:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV32-NEXT:    sub a2, a0, a1
+; ZVFHMIN-RV32-NEXT:    lui a3, %hi(.LCPI33_0)
+; ZVFHMIN-RV32-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
+; ZVFHMIN-RV32-NEXT:    sltu a3, a0, a2
+; ZVFHMIN-RV32-NEXT:    addi a3, a3, -1
+; ZVFHMIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v16
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-RV32-NEXT:    bltu a0, a1, .LBB33_2
+; ZVFHMIN-RV32-NEXT:  # %bb.1:
+; ZVFHMIN-RV32-NEXT:    mv a0, a1
+; ZVFHMIN-RV32-NEXT:  .LBB33_2:
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v24, v8
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: vp_rint_nxv16f64_unmasked:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    csrr a1, vlenb
+; ZVFHMIN-RV64-NEXT:    sub a2, a0, a1
+; ZVFHMIN-RV64-NEXT:    sltu a3, a0, a2
+; ZVFHMIN-RV64-NEXT:    addi a3, a3, -1
+; ZVFHMIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v16
+; ZVFHMIN-RV64-NEXT:    li a2, 1075
+; ZVFHMIN-RV64-NEXT:    slli a2, a2, 52
+; ZVFHMIN-RV64-NEXT:    fmv.d.x fa5, a2
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-RV64-NEXT:    bltu a0, a1, .LBB33_2
+; ZVFHMIN-RV64-NEXT:  # %bb.1:
+; ZVFHMIN-RV64-NEXT:    mv a0, a1
+; ZVFHMIN-RV64-NEXT:  .LBB33_2:
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v24, v8
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v24, fa5
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
index dbc81035c169e5e..84110accd95a21c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
@@ -13,10 +13,11 @@ declare <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half>, <vscale
 define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv1f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -55,10 +56,11 @@ define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x
 define <vscale x 1 x half> @vp_round_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv1f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half>, <vscale
 define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -139,10 +142,11 @@ define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x
 define <vscale x 2 x half> @vp_round_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half>, <vscale
 define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -225,10 +230,11 @@ define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x
 define <vscale x 4 x half> @vp_round_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: vp_round_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -313,10 +320,11 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x
 define <vscale x 8 x half> @vp_round_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -356,10 +364,11 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH-LABEL: vp_round_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -401,10 +410,11 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale
 define <vscale x 16 x half> @vp_round_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -444,10 +454,11 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFH-LABEL: vp_round_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
@@ -534,10 +545,11 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale
 define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv32f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
 ; ZVFH-NEXT:    fsrmi a0, 4
 ; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -845,41 +857,11 @@ define <vscale x 16 x float> @vp_round_nxv16f32_unmasked(<vscale x 16 x float> %
 declare <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x double> @vp_round_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @vp_round_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv1f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -889,43 +871,11 @@ define <vscale x 1 x double> @vp_round_nxv1f64_unmasked(<vscale x 1 x double> %v
 declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @vp_round_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -935,43 +885,11 @@ define <vscale x 2 x double> @vp_round_nxv2f64_unmasked(<vscale x 2 x double> %v
 declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @vp_round_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -981,43 +899,11 @@ define <vscale x 4 x double> @vp_round_nxv4f64_unmasked(<vscale x 4 x double> %v
 declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
 
 define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv7f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
 }
 
 define <vscale x 7 x double> @vp_round_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv7f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -1027,43 +913,11 @@ define <vscale x 7 x double> @vp_round_nxv7f64_unmasked(<vscale x 7 x double> %v
 declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @vp_round_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1074,111 +928,11 @@ define <vscale x 8 x double> @vp_round_nxv8f64_unmasked(<vscale x 8 x double> %v
 declare <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 3
-; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a2, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
 
 define <vscale x 16 x double> @vp_round_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_round_nxv16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a2, 4
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
index 6c93ae68344cb6d..29a5bbe73c23695 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
@@ -13,10 +13,11 @@ declare <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half>, <vsc
 define <vscale x 1 x half> @vp_roundeven_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv1f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -55,10 +56,11 @@ define <vscale x 1 x half> @vp_roundeven_nxv1f16(<vscale x 1 x half> %va, <vscal
 define <vscale x 1 x half> @vp_roundeven_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv1f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half>, <vsc
 define <vscale x 2 x half> @vp_roundeven_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -139,10 +142,11 @@ define <vscale x 2 x half> @vp_roundeven_nxv2f16(<vscale x 2 x half> %va, <vscal
 define <vscale x 2 x half> @vp_roundeven_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half>, <vsc
 define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -225,10 +230,11 @@ define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscal
 define <vscale x 4 x half> @vp_roundeven_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal
 ; ZVFH-LABEL: vp_roundeven_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -313,10 +320,11 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal
 define <vscale x 8 x half> @vp_roundeven_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -356,10 +364,11 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs
 ; ZVFH-LABEL: vp_roundeven_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -401,10 +410,11 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs
 define <vscale x 16 x half> @vp_roundeven_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -444,10 +454,11 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFH-LABEL: vp_roundeven_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
@@ -534,10 +545,11 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs
 define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv32f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
 ; ZVFH-NEXT:    fsrmi a0, 0
 ; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -845,41 +857,11 @@ define <vscale x 16 x float> @vp_roundeven_nxv16f32_unmasked(<vscale x 16 x floa
 declare <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x double> @vp_roundeven_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @vp_roundeven_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -889,43 +871,11 @@ define <vscale x 1 x double> @vp_roundeven_nxv1f64_unmasked(<vscale x 1 x double
 declare <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @vp_roundeven_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -935,43 +885,11 @@ define <vscale x 2 x double> @vp_roundeven_nxv2f64_unmasked(<vscale x 2 x double
 declare <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @vp_roundeven_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -981,43 +899,11 @@ define <vscale x 4 x double> @vp_roundeven_nxv4f64_unmasked(<vscale x 4 x double
 declare <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
 
 define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv7f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
 }
 
 define <vscale x 7 x double> @vp_roundeven_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -1027,43 +913,11 @@ define <vscale x 7 x double> @vp_roundeven_nxv7f64_unmasked(<vscale x 7 x double
 declare <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @vp_roundeven_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1074,111 +928,11 @@ define <vscale x 8 x double> @vp_roundeven_nxv8f64_unmasked(<vscale x 8 x double
 declare <vscale x 16 x double> @llvm.vp.roundeven.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 3
-; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a2, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.vp.roundeven.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
 
 define <vscale x 16 x double> @vp_roundeven_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a2, 0
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x double> @llvm.vp.roundeven.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
index f35d70d6d470b37..446385e92781cfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
@@ -13,10 +13,11 @@ declare <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half>, <v
 define <vscale x 1 x half> @vp_roundtozero_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv1f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI0_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -55,10 +56,11 @@ define <vscale x 1 x half> @vp_roundtozero_nxv1f16(<vscale x 1 x half> %va, <vsc
 define <vscale x 1 x half> @vp_roundtozero_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv1f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI1_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -97,10 +99,11 @@ declare <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half>, <v
 define <vscale x 2 x half> @vp_roundtozero_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv2f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI2_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -139,10 +142,11 @@ define <vscale x 2 x half> @vp_roundtozero_nxv2f16(<vscale x 2 x half> %va, <vsc
 define <vscale x 2 x half> @vp_roundtozero_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv2f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI3_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -181,10 +185,11 @@ declare <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half>, <v
 define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv4f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI4_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -225,10 +230,11 @@ define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vsc
 define <vscale x 4 x half> @vp_roundtozero_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv4f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI5_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFH-NEXT:    vfabs.v v9, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
@@ -268,10 +274,11 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc
 ; ZVFH-LABEL: vp_roundtozero_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -313,10 +320,11 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc
 define <vscale x 8 x half> @vp_roundtozero_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv8f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI7_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfabs.v v10, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v10, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
@@ -356,10 +364,11 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <
 ; ZVFH-LABEL: vp_roundtozero_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -401,10 +410,11 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <
 define <vscale x 16 x half> @vp_roundtozero_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv16f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI9_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfabs.v v12, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
@@ -444,10 +454,11 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <
 ; ZVFH-LABEL: vp_roundtozero_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
@@ -534,10 +545,11 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <
 define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv32f16_unmasked:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI11_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vfabs.v v16, v8
+; ZVFH-NEXT:    li a0, 25
+; ZVFH-NEXT:    slli a0, a0, 10
+; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vmflt.vf v0, v16, fa5
 ; ZVFH-NEXT:    fsrmi a0, 1
 ; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
@@ -845,41 +857,11 @@ define <vscale x 16 x float> @vp_roundtozero_nxv16f32_unmasked(<vscale x 16 x fl
 declare <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x double> @vp_roundtozero_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv1f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @vp_roundtozero_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv1f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
@@ -889,43 +871,11 @@ define <vscale x 1 x double> @vp_roundtozero_nxv1f64_unmasked(<vscale x 1 x doub
 declare <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @vp_roundtozero_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv2f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI25_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v10, v8
-; CHECK-NEXT:    vmflt.vf v0, v10, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -935,43 +885,11 @@ define <vscale x 2 x double> @vp_roundtozero_nxv2f64_unmasked(<vscale x 2 x doub
 declare <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @vp_roundtozero_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv4f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI27_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8
-; CHECK-NEXT:    vmflt.vf v0, v12, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -981,43 +899,11 @@ define <vscale x 4 x double> @vp_roundtozero_nxv4f64_unmasked(<vscale x 4 x doub
 declare <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
 
 define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv7f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
 }
 
 define <vscale x 7 x double> @vp_roundtozero_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv7f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI29_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
   %v = call <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -1027,43 +913,11 @@ define <vscale x 7 x double> @vp_roundtozero_nxv7f64_unmasked(<vscale x 7 x doub
 declare <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv8f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @vp_roundtozero_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv8f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI31_0)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8
-; CHECK-NEXT:    vmflt.vf v0, v16, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1074,111 +928,11 @@ define <vscale x 8 x double> @vp_roundtozero_nxv8f64_unmasked(<vscale x 8 x doub
 declare <vscale x 16 x double> @llvm.vp.roundtozero.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv16f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 3
-; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfabs.v v8, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
-; CHECK-NEXT:    fsrmi a2, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
   %v = call <vscale x 16 x double> @llvm.vp.roundtozero.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
 
 define <vscale x 16 x double> @vp_roundtozero_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vp_roundtozero_nxv16f64_unmasked:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI33_0)(a3)
-; CHECK-NEXT:    sltu a3, a0, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a2, 1
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8
-; CHECK-NEXT:    vmflt.vf v0, v24, fa5
-; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
-; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
   %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x double> @llvm.vp.roundtozero.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll
index 1bc0ed4e7513f30..c333fbd49ba4cb0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll
@@ -24,18 +24,6 @@ define <vscale x 1 x double> @test1(<vscale x 1 x double> %a, <vscale x 1 x doub
 
 ; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
 define <vscale x 1 x double> @test2(<vscale x 1 x double> %a, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: test2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI1_0)
-; CHECK-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vlse64.v v9, (a1), zero
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_1)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI1_1)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfadd.vf v9, v9, fa5, v0.t
-; CHECK-NEXT:    vfmul.vv v8, v8, v9, v0.t
-; CHECK-NEXT:    ret
   %elt.head1 = insertelement <vscale x 1 x double> poison, double 2.0, i32 0
   %c1 = shufflevector <vscale x 1 x double> %elt.head1, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
   %t = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %c1, <vscale x 1 x i1> %m, i32 %evl)
@@ -47,19 +35,6 @@ define <vscale x 1 x double> @test2(<vscale x 1 x double> %a, <vscale x 1 x i1>
 
 ; (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
 define <vscale x 1 x double> @test3(<vscale x 1 x double> %a, <vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: test3:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI2_0)
-; CHECK-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vlse64.v v10, (a1), zero
-; CHECK-NEXT:    lui a1, %hi(.LCPI2_1)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI2_1)(a1)
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfmul.vf v10, v10, fa5, v0.t
-; CHECK-NEXT:    vfmadd.vv v10, v8, v9, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
   %elt.head1 = insertelement <vscale x 1 x double> poison, double 2.0, i32 0
   %c1 = shufflevector <vscale x 1 x double> %elt.head1, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
   %t = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %c1, <vscale x 1 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index c5245451dc44091..af2d99c5ff9685c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -1019,12 +1019,12 @@ define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
 ; CHECK-LABEL: vreduce_fmin_nxv10f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    lui a1, %hi(.LCPI73_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI73_0)
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vlse16.v v12, (a1), zero
 ; CHECK-NEXT:    srli a0, a0, 2
 ; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    lui a2, 8
+; CHECK-NEXT:    addi a2, a2, -512
+; CHECK-NEXT:    vsetvli a3, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a2
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, ma
 ; CHECK-NEXT:    vslideup.vx v10, v12, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 44a396ee29a8ab3..b39873af5f90024 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -125,13 +125,13 @@ define <vscale x 1 x double> @test4(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    beqz a1, .LBB3_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI3_0)
+; CHECK-NEXT:    li a1, 1023
+; CHECK-NEXT:    slli a1, a1, 52
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vlse64.v v10, (a1), zero
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_1)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI3_1)
-; CHECK-NEXT:    vlse64.v v11, (a1), zero
+; CHECK-NEXT:    vmv.v.x v10, a1
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    slli a1, a1, 62
+; CHECK-NEXT:    vmv.v.x v11, a1
 ; CHECK-NEXT:    vfadd.vv v10, v10, v11
 ; CHECK-NEXT:    lui a1, %hi(scratch)
 ; CHECK-NEXT:    addi a1, a1, %lo(scratch)
@@ -245,12 +245,12 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK-NEXT:    beqz a1, .LBB5_4
 ; CHECK-NEXT:  .LBB5_2: # %if.then4
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_0)
-; CHECK-NEXT:    vlse64.v v9, (a0), zero
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_1)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_1)
-; CHECK-NEXT:    vlse64.v v10, (a0), zero
+; CHECK-NEXT:    li a0, 1023
+; CHECK-NEXT:    slli a0, a0, 52
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 62
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vfadd.vv v9, v9, v10
 ; CHECK-NEXT:    lui a0, %hi(scratch)
 ; CHECK-NEXT:    addi a0, a0, %lo(scratch)
diff --git a/llvm/test/CodeGen/RISCV/zfbfmin.ll b/llvm/test/CodeGen/RISCV/zfbfmin.ll
index b32e6dc0b14b5c0..24fecc52f06b42c 100644
--- a/llvm/test/CodeGen/RISCV/zfbfmin.ll
+++ b/llvm/test/CodeGen/RISCV/zfbfmin.ll
@@ -73,8 +73,9 @@ define bfloat @bfloat_load(ptr %a) nounwind {
 define bfloat @bfloat_imm() nounwind {
 ; CHECKIZFBFMIN-LABEL: bfloat_imm:
 ; CHECKIZFBFMIN:       # %bb.0:
-; CHECKIZFBFMIN-NEXT:    lui a0, %hi(.LCPI7_0)
-; CHECKIZFBFMIN-NEXT:    flh fa0, %lo(.LCPI7_0)(a0)
+; CHECKIZFBFMIN-NEXT:    lui a0, 4
+; CHECKIZFBFMIN-NEXT:    addi a0, a0, 64
+; CHECKIZFBFMIN-NEXT:    fmv.h.x fa0, a0
 ; CHECKIZFBFMIN-NEXT:    ret
   ret bfloat 3.0
 }

>From 2f9e8b36d07c96eb0054b01518ac1a27979029d1 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 24 Oct 2023 14:00:38 -0700
Subject: [PATCH 3/4] Use generateInstSeq directly [nfc]

Since CompressionCost was not set, the code is equivalent.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1049532a8414f0c..98f7ccb20a1b061 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2025,10 +2025,10 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 
   // Building an integer and then converting requires a fmv at the end of
   // the integer sequence.
-  const unsigned Cost =
-    1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
-                                   Subtarget.getFeatureBits());
-  return Cost <= Subtarget.getMaxBuildIntsCost();
+  int64_t Val = Imm.bitcastToAPInt().getSExtValue();
+  RISCVMatInt::InstSeq Seq =
+      RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
+  return Seq.size() + 1 <= Subtarget.getMaxBuildIntsCost();
 }
 
 // TODO: This is very conservative.

>From de38ca2276b48400d84913c07387a4a4d8291492 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 26 Oct 2023 13:40:31 -0700
Subject: [PATCH 4/4] Remove redundant --check-prefixes options

---
 llvm/test/CodeGen/RISCV/bfloat-imm.ll | 4 ++--
 llvm/test/CodeGen/RISCV/float-imm.ll  | 4 ++--
 llvm/test/CodeGen/RISCV/half-imm.ll   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/bfloat-imm.ll b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
index ed2c6f59f8fd576..b2a342ae9886957 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-imm.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi ilp32f < %s | FileCheck --check-prefixes=CHECK %s
+; RUN:   -target-abi ilp32f < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi lp64f < %s | FileCheck --check-prefixes=CHECK %s
+; RUN:   -target-abi lp64f < %s | FileCheck %s
 
 define bfloat @bfloat_imm() nounwind {
 ; CHECK-LABEL: bfloat_imm:
diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll
index 4cbcca2937e3bf1..18ff64acda8b67b 100644
--- a/llvm/test/CodeGen/RISCV/float-imm.ll
+++ b/llvm/test/CodeGen/RISCV/float-imm.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   -target-abi=ilp32f | FileCheck --check-prefixes=CHECK %s
+; RUN:   -target-abi=ilp32f | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   -target-abi=lp64f | FileCheck --check-prefixes=CHECK %s
+; RUN:   -target-abi=lp64f | FileCheck %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \
 ; RUN:   -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX,RV32ZFINX %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \
diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll
index b4ce2c7047d5128..7e1f20c6f979de0 100644
--- a/llvm/test/CodeGen/RISCV/half-imm.ll
+++ b/llvm/test/CodeGen/RISCV/half-imm.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs \
-; RUN:   -target-abi ilp32f < %s | FileCheck --check-prefixes=CHECK %s
+; RUN:   -target-abi ilp32f < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \
-; RUN:   -target-abi lp64f < %s | FileCheck --check-prefixes=CHECK %s
+; RUN:   -target-abi lp64f < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zhinx -verify-machineinstrs \
 ; RUN:   -target-abi ilp32 < %s \
 ; RUN:   | FileCheck -check-prefix=RV32IZHINX %s



More information about the llvm-commits mailing list