[llvm] a0d80c2 - [RISCV] Generalize performFP_TO_INTCombine to vectors

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 18 02:53:44 PST 2023


Author: Luke Lau
Date: 2023-01-18T10:53:24Z
New Revision: a0d80c239840ec477dd23071e748e60205ffd6b7

URL: https://github.com/llvm/llvm-project/commit/a0d80c239840ec477dd23071e748e60205ffd6b7
DIFF: https://github.com/llvm/llvm-project/commit/a0d80c239840ec477dd23071e748e60205ffd6b7.diff

LOG: [RISCV] Generalize performFP_TO_INTCombine to vectors

Like in the scalar domain, combine calls to (fp_to_int (ftrunc X)) on
scalable and fixed-length vectors into a single vfcvt instruction.
For truncating rounds, the static vfcvt.rtz rounding mode is used.
Otherwise use the VFCVT_RM_ variants to set the rounding mode
dynamically.
Closes https://github.com/llvm/llvm-project/issues/56737

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D141599

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
    llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
    llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
    llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
    llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 315c10887a83..de3dcc29b78c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1915,7 +1915,8 @@ getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
 
 // Gets the two common "VL" operands: an all-ones mask and the vector length.
 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
-// the vector type that it is contained in.
+// the vector type that the fixed-length vector is contained in. Otherwise if
+// VecVT is scalable, then ContainerVT should be the same as VecVT.
 static std::pair<SDValue, SDValue>
 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
                 const RISCVSubtarget &Subtarget) {
@@ -9555,15 +9556,9 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   MVT XLenVT = Subtarget.getXLenVT();
 
-  // Only handle XLen or i32 types. Other types narrower than XLen will
-  // eventually be legalized to XLenVT.
-  EVT VT = N->getValueType(0);
-  if (VT != MVT::i32 && VT != XLenVT)
-    return SDValue();
-
   SDValue Src = N->getOperand(0);
 
-  // Ensure the FP type is also legal.
+  // Ensure the FP type is legal.
   if (!TLI.isTypeLegal(Src.getValueType()))
     return SDValue();
 
@@ -9575,7 +9570,57 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   if (FRM == RISCVFPRndMode::Invalid)
     return SDValue();
 
+  SDLoc DL(N);
   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+  EVT VT = N->getValueType(0);
+
+  if (VT.isVector() && TLI.isTypeLegal(VT)) {
+    MVT SrcVT = Src.getSimpleValueType();
+    MVT SrcContainerVT = SrcVT;
+    MVT ContainerVT = VT.getSimpleVT();
+    SDValue XVal = Src.getOperand(0);
+
+    // TODO: Support combining with widening and narrowing instructions
+    // For now only support conversions of the same bit size
+    if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits())
+      return SDValue();
+
+    // Make fixed-length vectors scalable first
+    if (SrcVT.isFixedLengthVector()) {
+      SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
+      XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
+      ContainerVT =
+          getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
+    }
+
+    auto [Mask, VL] =
+        getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
+
+    SDValue FpToInt;
+    if (FRM == RISCVFPRndMode::RTZ) {
+      // Use the dedicated trunc static rounding mode if we're truncating so we
+      // don't need to generate calls to fsrmi/fsrm
+      unsigned Opc =
+          IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
+      FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
+    } else {
+      unsigned Opc =
+          IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
+      FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
+                            DAG.getTargetConstant(FRM, DL, XLenVT), VL);
+    }
+
+    // If converted from fixed-length to scalable, convert back
+    if (VT.isFixedLengthVector())
+      FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
+
+    return FpToInt;
+  }
+
+  // Only handle XLen or i32 types. Other types narrower than XLen will
+  // eventually be legalized to XLenVT.
+  if (VT != MVT::i32 && VT != XLenVT)
+    return SDValue();
 
   unsigned Opc;
   if (VT == XLenVT)
@@ -9583,7 +9628,6 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   else
     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
 
-  SDLoc DL(N);
   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
                                 DAG.getTargetConstant(FRM, DL, XLenVT));
   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
@@ -11604,6 +11648,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
   case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK:
     return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
+  case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK:
+    return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK);
+  case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK:
+    return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M2_MASK);
+  case RISCV::PseudoVFCVT_RM_XU_F_V_M4_MASK:
+    return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M4_MASK);
+  case RISCV::PseudoVFCVT_RM_XU_F_V_M8_MASK:
+    return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M8_MASK);
+  case RISCV::PseudoVFCVT_RM_XU_F_V_MF2_MASK:
+    return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK);
+  case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK:
+    return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK);
   case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK:
     return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK);
   case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK:
@@ -13218,7 +13274,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
   NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
   NODE_NAME_CASE(VFCVT_RM_X_F_VL)
+  NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
   NODE_NAME_CASE(VFCVT_X_F_VL)
+  NODE_NAME_CASE(VFCVT_XU_F_VL)
   NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
   NODE_NAME_CASE(SINT_TO_FP_VL)
   NODE_NAME_CASE(UINT_TO_FP_VL)

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 91f1a0f2e343..7c77f9c2fe8b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -239,8 +239,10 @@ enum NodeType : unsigned {
   VFCVT_RTZ_X_F_VL,
   VFCVT_RTZ_XU_F_VL,
   VFCVT_X_F_VL,
+  VFCVT_XU_F_VL,
   VFROUND_NOEXCEPT_VL,
   VFCVT_RM_X_F_VL, // Has a rounding mode operand.
+  VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
   SINT_TO_FP_VL,
   UINT_TO_FP_VL,
   VFCVT_RM_F_XU_VL, // Has a rounding mode operand.

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index f2d22048babc..4d52730d1dd3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -5512,6 +5512,7 @@ defm PseudoVFCVT_X_F : VPseudoVCVTI_V;
 }
 defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
 defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V;
 defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V;
 defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V;
 let Uses = [FRM] in {

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index bbb55f8ef257..8b26e6a2475b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -159,7 +159,9 @@ def SDT_RISCVVecCvtF2XOp_VL : SDTypeProfile<1, 4, [
 ]>;
 
 def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
+def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
 def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>;
+def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>;
 def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>;
 
 def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
@@ -1739,7 +1741,9 @@ foreach fvti = AllFloatVectors in {
 
   // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
   defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl,  "PseudoVFCVT_RM_X_F_V">;
+  defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl,  "PseudoVFCVT_RM_XU_F_V">;
   defm : VPatConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
+  defm : VPatConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFCVT_XU_F_V">;
   defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
   defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
   defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
index 7f8644edfef2..de270fce84ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
@@ -251,29 +251,13 @@ define <vscale x 1 x i32> @trunc_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
 define <vscale x 1 x i64> @trunc_nxv1f64_to_si64(<vscale x 1 x double> %x) {
 ; RV32-LABEL: trunc_nxv1f64_to_si64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI6_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI6_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_si64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI6_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI6_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
@@ -284,29 +268,13 @@ define <vscale x 1 x i64> @trunc_nxv1f64_to_si64(<vscale x 1 x double> %x) {
 define <vscale x 1 x i64> @trunc_nxv1f64_to_ui64(<vscale x 1 x double> %x) {
 ; RV32-LABEL: trunc_nxv1f64_to_ui64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI7_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_ui64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI7_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
@@ -561,29 +529,13 @@ define <vscale x 4 x i32> @trunc_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
 define <vscale x 4 x i64> @trunc_nxv4f64_to_si64(<vscale x 4 x double> %x) {
 ; RV32-LABEL: trunc_nxv4f64_to_si64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI14_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vfabs.v v12, v8
-; RV32-NEXT:    vmflt.vf v0, v12, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_si64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI14_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI14_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT:    vfabs.v v12, v8
-; RV64-NEXT:    vmflt.vf v0, v12, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
@@ -594,29 +546,13 @@ define <vscale x 4 x i64> @trunc_nxv4f64_to_si64(<vscale x 4 x double> %x) {
 define <vscale x 4 x i64> @trunc_nxv4f64_to_ui64(<vscale x 4 x double> %x) {
 ; RV32-LABEL: trunc_nxv4f64_to_ui64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI15_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vfabs.v v12, v8
-; RV32-NEXT:    vmflt.vf v0, v12, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_ui64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI15_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT:    vfabs.v v12, v8
-; RV64-NEXT:    vmflt.vf v0, v12, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
@@ -895,34 +831,20 @@ define <vscale x 1 x i32> @ceil_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
 define <vscale x 1 x i64> @ceil_nxv1f64_to_si64(<vscale x 1 x double> %x) {
 ; RV32-LABEL: ceil_nxv1f64_to_si64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI22_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_si64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI22_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
   %b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i64>
@@ -932,34 +854,20 @@ define <vscale x 1 x i64> @ceil_nxv1f64_to_si64(<vscale x 1 x double> %x) {
 define <vscale x 1 x i64> @ceil_nxv1f64_to_ui64(<vscale x 1 x double> %x) {
 ; RV32-LABEL: ceil_nxv1f64_to_ui64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI23_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_ui64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI23_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
   %b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i64>
@@ -1237,34 +1145,20 @@ define <vscale x 4 x i32> @ceil_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
 define <vscale x 4 x i64> @ceil_nxv4f64_to_si64(<vscale x 4 x double> %x) {
 ; RV32-LABEL: ceil_nxv4f64_to_si64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI30_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI30_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vfabs.v v12, v8
-; RV32-NEXT:    vmflt.vf v0, v12, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_si64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI30_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI30_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT:    vfabs.v v12, v8
-; RV64-NEXT:    vmflt.vf v0, v12, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
   %b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i64>
@@ -1274,34 +1168,20 @@ define <vscale x 4 x i64> @ceil_nxv4f64_to_si64(<vscale x 4 x double> %x) {
 define <vscale x 4 x i64> @ceil_nxv4f64_to_ui64(<vscale x 4 x double> %x) {
 ; RV32-LABEL: ceil_nxv4f64_to_ui64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI31_0)
-; RV32-NEXT:    fld ft0, %lo(.LCPI31_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vfabs.v v12, v8
-; RV32-NEXT:    vmflt.vf v0, v12, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_ui64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI31_0)
-; RV64-NEXT:    fld ft0, %lo(.LCPI31_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT:    vfabs.v v12, v8
-; RV64-NEXT:    vmflt.vf v0, v12, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
   %b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i64>

diff  --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
index 1c3612651fe1..334bc29e2370 100644
--- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
@@ -165,29 +165,13 @@ define <vscale x 1 x i16> @trunc_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
 define <vscale x 1 x i32> @trunc_nxv1f32_to_si32(<vscale x 1 x float> %x) {
 ; RV32-LABEL: trunc_nxv1f32_to_si32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI4_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f32_to_si32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI4_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
@@ -198,29 +182,13 @@ define <vscale x 1 x i32> @trunc_nxv1f32_to_si32(<vscale x 1 x float> %x) {
 define <vscale x 1 x i32> @trunc_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
 ; RV32-LABEL: trunc_nxv1f32_to_ui32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI5_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f32_to_ui32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI5_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
@@ -459,29 +427,13 @@ define <vscale x 4 x i16> @trunc_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
 define <vscale x 4 x i32> @trunc_nxv4f32_to_si32(<vscale x 4 x float> %x) {
 ; RV32-LABEL: trunc_nxv4f32_to_si32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI12_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfabs.v v10, v8
-; RV32-NEXT:    vmflt.vf v0, v10, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_si32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI12_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI12_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfabs.v v10, v8
-; RV64-NEXT:    vmflt.vf v0, v10, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
@@ -492,29 +444,13 @@ define <vscale x 4 x i32> @trunc_nxv4f32_to_si32(<vscale x 4 x float> %x) {
 define <vscale x 4 x i32> @trunc_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
 ; RV32-LABEL: trunc_nxv4f32_to_ui32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfabs.v v10, v8
-; RV32-NEXT:    vmflt.vf v0, v10, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_ui32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfabs.v v10, v8
-; RV64-NEXT:    vmflt.vf v0, v10, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
@@ -769,34 +705,20 @@ define <vscale x 1 x i16> @ceil_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
 define <vscale x 1 x i32> @ceil_nxv1f32_to_si32(<vscale x 1 x float> %x) {
 ; RV32-LABEL: ceil_nxv1f32_to_si32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI20_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI20_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f32_to_si32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI20_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI20_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
   %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i32>
@@ -806,34 +728,20 @@ define <vscale x 1 x i32> @ceil_nxv1f32_to_si32(<vscale x 1 x float> %x) {
 define <vscale x 1 x i32> @ceil_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
 ; RV32-LABEL: ceil_nxv1f32_to_ui32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI21_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI21_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f32_to_ui32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI21_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI21_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
   %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i32>
@@ -1095,34 +1003,20 @@ define <vscale x 4 x i16> @ceil_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
 define <vscale x 4 x i32> @ceil_nxv4f32_to_si32(<vscale x 4 x float> %x) {
 ; RV32-LABEL: ceil_nxv4f32_to_si32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI28_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI28_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfabs.v v10, v8
-; RV32-NEXT:    vmflt.vf v0, v10, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_si32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI28_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI28_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfabs.v v10, v8
-; RV64-NEXT:    vmflt.vf v0, v10, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i32>
@@ -1132,34 +1026,20 @@ define <vscale x 4 x i32> @ceil_nxv4f32_to_si32(<vscale x 4 x float> %x) {
 define <vscale x 4 x i32> @ceil_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
 ; RV32-LABEL: ceil_nxv4f32_to_ui32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI29_0)
-; RV32-NEXT:    flw ft0, %lo(.LCPI29_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfabs.v v10, v8
-; RV32-NEXT:    vmflt.vf v0, v10, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_ui32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI29_0)
-; RV64-NEXT:    flw ft0, %lo(.LCPI29_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfabs.v v10, v8
-; RV64-NEXT:    vmflt.vf v0, v10, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i32>

diff  --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
index e48887488428..8a249b4272a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
@@ -87,29 +87,13 @@ define <vscale x 1 x i8> @trunc_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
 define <vscale x 1 x i16> @trunc_nxv1f16_to_si16(<vscale x 1 x half> %x) {
 ; RV32-LABEL: trunc_nxv1f16_to_si16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI2_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI2_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f16_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI2_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI2_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
@@ -120,29 +104,13 @@ define <vscale x 1 x i16> @trunc_nxv1f16_to_si16(<vscale x 1 x half> %x) {
 define <vscale x 1 x i16> @trunc_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
 ; RV32-LABEL: trunc_nxv1f16_to_ui16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI3_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f16_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI3_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
@@ -377,29 +345,13 @@ define <vscale x 4 x i8> @trunc_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
 define <vscale x 4 x i16> @trunc_nxv4f16_to_si16(<vscale x 4 x half> %x) {
 ; RV32-LABEL: trunc_nxv4f16_to_si16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI10_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f16_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI10_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
@@ -410,29 +362,13 @@ define <vscale x 4 x i16> @trunc_nxv4f16_to_si16(<vscale x 4 x half> %x) {
 define <vscale x 4 x i16> @trunc_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
 ; RV32-LABEL: trunc_nxv4f16_to_ui16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI11_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
-; RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f16_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI11_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI11_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
-; RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
@@ -675,34 +611,20 @@ define <vscale x 1 x i8> @ceil_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
 define <vscale x 1 x i16> @ceil_nxv1f16_to_si16(<vscale x 1 x half> %x) {
 ; RV32-LABEL: ceil_nxv1f16_to_si16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI18_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI18_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f16_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI18_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI18_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
   %b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i16>
@@ -712,34 +634,20 @@ define <vscale x 1 x i16> @ceil_nxv1f16_to_si16(<vscale x 1 x half> %x) {
 define <vscale x 1 x i16> @ceil_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
 ; RV32-LABEL: ceil_nxv1f16_to_ui16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI19_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f16_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI19_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
   %b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i16>
@@ -997,34 +905,20 @@ define <vscale x 4 x i8> @ceil_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
 define <vscale x 4 x i16> @ceil_nxv4f16_to_si16(<vscale x 4 x half> %x) {
 ; RV32-LABEL: ceil_nxv4f16_to_si16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI26_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI26_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f16_to_si16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI26_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI26_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i16>
@@ -1034,34 +928,20 @@ define <vscale x 4 x i16> @ceil_nxv4f16_to_si16(<vscale x 4 x half> %x) {
 define <vscale x 4 x i16> @ceil_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
 ; RV32-LABEL: ceil_nxv4f16_to_ui16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI27_0)
-; RV32-NEXT:    flh ft0, %lo(.LCPI27_0)(a0)
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfabs.v v9, v8
-; RV32-NEXT:    vmflt.vf v0, v9, ft0
+; RV32-NEXT:    vmset.m v0
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f16_to_ui16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI27_0)
-; RV64-NEXT:    flh ft0, %lo(.LCPI27_0)(a0)
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfabs.v v9, v8
-; RV64-NEXT:    vmflt.vf v0, v9, ft0
+; RV64-NEXT:    vmset.m v0
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT:    vfcvt.xu.f.v v8, v8, v0.t
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i16>


        


More information about the llvm-commits mailing list